aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/mrp.c27
-rw-r--r--net/8021q/vlan.c15
-rw-r--r--net/8021q/vlan_core.c9
-rw-r--r--net/8021q/vlan_dev.c15
-rw-r--r--net/9p/client.c132
-rw-r--r--net/9p/trans_common.c10
-rw-r--r--net/9p/trans_fd.c40
-rw-r--r--net/9p/trans_rdma.c122
-rw-r--r--net/9p/trans_virtio.c5
-rw-r--r--net/Kconfig22
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/aarp.c2
-rw-r--r--net/appletalk/atalk_proc.c2
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/clip.c8
-rw-r--r--net/atm/mpc.c6
-rw-r--r--net/ax25/af_ax25.c6
-rw-r--r--net/ax25/sysctl_net_ax25.c2
-rw-r--r--net/batman-adv/Makefile1
-rw-r--r--net/batman-adv/bat_iv_ogm.c211
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c100
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h12
-rw-r--r--net/batman-adv/distributed-arp-table.c82
-rw-r--r--net/batman-adv/gateway_client.c40
-rw-r--r--net/batman-adv/gateway_client.h4
-rw-r--r--net/batman-adv/hard-interface.c98
-rw-r--r--net/batman-adv/icmp_socket.c5
-rw-r--r--net/batman-adv/main.c59
-rw-r--r--net/batman-adv/main.h21
-rw-r--r--net/batman-adv/network-coding.c22
-rw-r--r--net/batman-adv/network-coding.h6
-rw-r--r--net/batman-adv/originator.c6
-rw-r--r--net/batman-adv/originator.h2
-rw-r--r--net/batman-adv/ring_buffer.c51
-rw-r--r--net/batman-adv/ring_buffer.h27
-rw-r--r--net/batman-adv/routing.c82
-rw-r--r--net/batman-adv/routing.h1
-rw-r--r--net/batman-adv/send.c37
-rw-r--r--net/batman-adv/send.h6
-rw-r--r--net/batman-adv/soft-interface.c19
-rw-r--r--net/batman-adv/sysfs.c9
-rw-r--r--net/batman-adv/translation-table.c79
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/types.h6
-rw-r--r--net/batman-adv/unicast.c27
-rw-r--r--net/batman-adv/vis.c21
-rw-r--r--net/bluetooth/hci_conn.c62
-rw-r--r--net/bluetooth/hci_core.c284
-rw-r--r--net/bluetooth/hci_event.c106
-rw-r--r--net/bluetooth/hci_sysfs.c2
-rw-r--r--net/bluetooth/hidp/core.c125
-rw-r--r--net/bluetooth/hidp/hidp.h2
-rw-r--r--net/bluetooth/l2cap_core.c202
-rw-r--r--net/bluetooth/l2cap_sock.c4
-rw-r--r--net/bluetooth/mgmt.c252
-rw-r--r--net/bluetooth/rfcomm/tty.c278
-rw-r--r--net/bluetooth/sco.c85
-rw-r--r--net/bluetooth/smp.c4
-rw-r--r--net/bridge/br_device.c36
-rw-r--r--net/bridge/br_fdb.c15
-rw-r--r--net/bridge/br_forward.c14
-rw-r--r--net/bridge/br_if.c8
-rw-r--r--net/bridge/br_input.c18
-rw-r--r--net/bridge/br_mdb.c22
-rw-r--r--net/bridge/br_multicast.c357
-rw-r--r--net/bridge/br_netfilter.c4
-rw-r--r--net/bridge/br_netlink.c18
-rw-r--r--net/bridge/br_notify.c7
-rw-r--r--net/bridge/br_private.h96
-rw-r--r--net/bridge/br_stp.c23
-rw-r--r--net/bridge/br_stp_if.c12
-rw-r--r--net/bridge/br_sysfs_br.c28
-rw-r--r--net/bridge/br_sysfs_if.c4
-rw-r--r--net/bridge/br_vlan.c4
-rw-r--r--net/bridge/netfilter/ebt_ulog.c6
-rw-r--r--net/bridge/netfilter/ebtable_broute.c2
-rw-r--r--net/bridge/netfilter/ebtable_filter.c2
-rw-r--r--net/bridge/netfilter/ebtable_nat.c2
-rw-r--r--net/bridge/netfilter/ebtables.c6
-rw-r--r--net/caif/caif_dev.c4
-rw-r--r--net/caif/caif_usb.c4
-rw-r--r--net/caif/cfctrl.c3
-rw-r--r--net/can/af_can.c4
-rw-r--r--net/can/bcm.c4
-rw-r--r--net/can/gw.c39
-rw-r--r--net/can/raw.c4
-rw-r--r--net/ceph/auth_none.c6
-rw-r--r--net/ceph/messenger.c4
-rw-r--r--net/ceph/osd_client.c103
-rw-r--r--net/ceph/osdmap.c2
-rw-r--r--net/core/datagram.c77
-rw-r--r--net/core/dev.c709
-rw-r--r--net/core/dev_ioctl.c19
-rw-r--r--net/core/drop_monitor.c4
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/ethtool.c60
-rw-r--r--net/core/fib_rules.c29
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/flow.c4
-rw-r--r--net/core/flow_dissector.c22
-rw-r--r--net/core/gen_estimator.c12
-rw-r--r--net/core/gen_stats.c22
-rw-r--r--net/core/iovec.c24
-rw-r--r--net/core/link_watch.c3
-rw-r--r--net/core/neighbour.c75
-rw-r--r--net/core/net-procfs.c16
-rw-r--r--net/core/net-sysfs.c165
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netpoll.c28
-rw-r--r--net/core/netprio_cgroup.c74
-rw-r--r--net/core/pktgen.c142
-rw-r--r--net/core/rtnetlink.c65
-rw-r--r--net/core/scm.c6
-rw-r--r--net/core/secure_seq.c27
-rw-r--r--net/core/skbuff.c103
-rw-r--r--net/core/sock.c209
-rw-r--r--net/core/sock_diag.c9
-rw-r--r--net/core/stream.c2
-rw-r--r--net/core/sysctl_net_core.c173
-rw-r--r--net/core/utils.c22
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/decnet/dn_dev.c6
-rw-r--r--net/decnet/sysctl_net_decnet.c6
-rw-r--r--net/dns_resolver/dns_key.c2
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/ethernet/eth.c21
-rw-r--r--net/ieee802154/6lowpan.c291
-rw-r--r--net/ieee802154/6lowpan.h20
-rw-r--r--net/ieee802154/wpan-class.c23
-rw-r--r--net/ipv4/Kconfig27
-rw-r--r--net/ipv4/Makefile7
-rw-r--r--net/ipv4/af_inet.c42
-rw-r--r--net/ipv4/ah4.c7
-rw-r--r--net/ipv4/arp.c10
-rw-r--r--net/ipv4/devinet.c30
-rw-r--r--net/ipv4/esp4.c9
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fib_rules.c25
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/gre.c253
-rw-r--r--net/ipv4/gre_demux.c414
-rw-r--r--net/ipv4/gre_offload.c130
-rw-r--r--net/ipv4/icmp.c51
-rw-r--r--net/ipv4/igmp.c167
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/inetpeer.c4
-rw-r--r--net/ipv4/ip_gre.c264
-rw-r--r--net/ipv4/ip_input.c15
-rw-r--r--net/ipv4/ip_output.c16
-rw-r--r--net/ipv4/ip_tunnel.c250
-rw-r--r--net/ipv4/ip_tunnel_core.c118
-rw-r--r--net/ipv4/ip_vti.c538
-rw-r--r--net/ipv4/ipcomp.c7
-rw-r--r--net/ipv4/ipip.c22
-rw-r--r--net/ipv4/ipmr.c24
-rw-r--r--net/ipv4/netfilter/Kconfig15
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/arptable_filter.c2
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c9
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c21
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c480
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c18
-rw-r--r--net/ipv4/netfilter/iptable_filter.c2
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c2
-rw-r--r--net/ipv4/netfilter/iptable_nat.c2
-rw-r--r--net/ipv4/netfilter/iptable_raw.c2
-rw-r--r--net/ipv4/netfilter/iptable_security.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c9
-rw-r--r--net/ipv4/ping.c643
-rw-r--r--net/ipv4/proc.c8
-rw-r--r--net/ipv4/raw.c11
-rw-r--r--net/ipv4/route.c168
-rw-r--r--net/ipv4/syncookies.c29
-rw-r--r--net/ipv4/sysctl_net_ipv4.c54
-rw-r--r--net/ipv4/tcp.c397
-rw-r--r--net/ipv4/tcp_cubic.c12
-rw-r--r--net/ipv4/tcp_fastopen.c13
-rw-r--r--net/ipv4/tcp_input.c724
-rw-r--r--net/ipv4/tcp_ipv4.c114
-rw-r--r--net/ipv4/tcp_memcontrol.c22
-rw-r--r--net/ipv4/tcp_metrics.c44
-rw-r--r--net/ipv4/tcp_minisocks.c14
-rw-r--r--net/ipv4/tcp_offload.c332
-rw-r--r--net/ipv4/tcp_output.c72
-rw-r--r--net/ipv4/tcp_probe.c87
-rw-r--r--net/ipv4/udp.c114
-rw-r--r--net/ipv4/udp_offload.c100
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
-rw-r--r--net/ipv4/xfrm4_output.c16
-rw-r--r--net/ipv4/xfrm4_state.c1
-rw-r--r--net/ipv4/xfrm4_tunnel.c2
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c593
-rw-r--r--net/ipv6/addrconf_core.c51
-rw-r--r--net/ipv6/addrlabel.c48
-rw-r--r--net/ipv6/af_inet6.c33
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/datagram.c27
-rw-r--r--net/ipv6/esp6.c4
-rw-r--r--net/ipv6/exthdrs.c6
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/fib6_rules.c37
-rw-r--r--net/ipv6/icmp.c35
-rw-r--r--net/ipv6/ip6_fib.c74
-rw-r--r--net/ipv6/ip6_gre.c23
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_offload.c5
-rw-r--r--net/ipv6/ip6_output.c110
-rw-r--r--net/ipv6/ip6_tunnel.c59
-rw-r--r--net/ipv6/ip6mr.c21
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/mcast.c358
-rw-r--r--net/ipv6/mip6.c6
-rw-r--r--net/ipv6/ndisc.c88
-rw-r--r--net/ipv6/netfilter/Kconfig13
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c20
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c503
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c2
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c2
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c2
-rw-r--r--net/ipv6/netfilter/ip6table_security.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c9
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c4
-rw-r--r--net/ipv6/output_core.c51
-rw-r--r--net/ipv6/ping.c277
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/raw.c62
-rw-r--r--net/ipv6/reassembly.c5
-rw-r--r--net/ipv6/route.c220
-rw-r--r--net/ipv6/sit.c295
-rw-r--r--net/ipv6/syncookies.c25
-rw-r--r--net/ipv6/sysctl_net_ipv6.c4
-rw-r--r--net/ipv6/tcp_ipv6.c17
-rw-r--r--net/ipv6/udp.c66
-rw-r--r--net/ipv6/udp_offload.c108
-rw-r--r--net/ipv6/xfrm6_output.c21
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/ipx/af_ipx.c2
-rw-r--r--net/ipx/ipx_proc.c2
-rw-r--r--net/irda/irlan/irlan_eth.c31
-rw-r--r--net/irda/irsysctl.c6
-rw-r--r--net/irda/irttp.c50
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/iucv/iucv.c2
-rw-r--r--net/key/af_key.c20
-rw-r--r--net/l2tp/l2tp_core.c114
-rw-r--r--net/l2tp/l2tp_core.h5
-rw-r--r--net/l2tp/l2tp_ppp.c9
-rw-r--r--net/lapb/lapb_timer.c1
-rw-r--r--net/llc/af_llc.c6
-rw-r--r--net/llc/llc_conn.c6
-rw-r--r--net/llc/llc_proc.c2
-rw-r--r--net/llc/llc_sap.c4
-rw-r--r--net/mac80211/aes_ccm.c6
-rw-r--r--net/mac80211/cfg.c322
-rw-r--r--net/mac80211/chan.c58
-rw-r--r--net/mac80211/debugfs_netdev.c15
-rw-r--r--net/mac80211/debugfs_sta.c9
-rw-r--r--net/mac80211/driver-ops.h16
-rw-r--r--net/mac80211/ht.c61
-rw-r--r--net/mac80211/ibss.c474
-rw-r--r--net/mac80211/ieee80211_i.h112
-rw-r--r--net/mac80211/iface.c64
-rw-r--r--net/mac80211/key.c178
-rw-r--r--net/mac80211/key.h15
-rw-r--r--net/mac80211/led.c19
-rw-r--r--net/mac80211/led.h4
-rw-r--r--net/mac80211/main.c27
-rw-r--r--net/mac80211/mesh.c117
-rw-r--r--net/mac80211/mesh.h7
-rw-r--r--net/mac80211/mesh_plink.c10
-rw-r--r--net/mac80211/mesh_ps.c4
-rw-r--r--net/mac80211/mlme.c693
-rw-r--r--net/mac80211/pm.c7
-rw-r--r--net/mac80211/rate.c79
-rw-r--r--net/mac80211/rate.h22
-rw-r--r--net/mac80211/rc80211_minstrel.c36
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c30
-rw-r--r--net/mac80211/rc80211_pid_algo.c1
-rw-r--r--net/mac80211/rx.c570
-rw-r--r--net/mac80211/scan.c81
-rw-r--r--net/mac80211/sta_info.c8
-rw-r--r--net/mac80211/sta_info.h9
-rw-r--r--net/mac80211/status.c90
-rw-r--r--net/mac80211/trace.h26
-rw-r--r--net/mac80211/tx.c133
-rw-r--r--net/mac80211/util.c263
-rw-r--r--net/mac80211/vht.c2
-rw-r--r--net/mac80211/wep.c48
-rw-r--r--net/mac80211/wpa.c68
-rw-r--r--net/mpls/Kconfig9
-rw-r--r--net/mpls/Makefile4
-rw-r--r--net/mpls/mpls_gso.c108
-rw-r--r--net/netfilter/Kconfig26
-rw-r--r--net/netfilter/Makefile6
-rw-r--r--net/netfilter/core.c28
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipset/ip_set_getport.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h28
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c35
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c19
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c122
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c84
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c883
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c14
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c114
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c19
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c9
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c89
-rw-r--r--net/netfilter/nf_conntrack_expect.c5
-rw-r--r--net/netfilter/nf_conntrack_ftp.c73
-rw-r--r--net/netfilter/nf_conntrack_labels.c6
-rw-r--r--net/netfilter/nf_conntrack_netlink.c413
-rw-r--r--net/netfilter/nf_conntrack_proto.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c54
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c238
-rw-r--r--net/netfilter/nf_conntrack_standalone.c4
-rw-r--r--net/netfilter/nf_log.c6
-rw-r--r--net/netfilter/nf_nat_core.c22
-rw-r--r--net/netfilter/nf_nat_helper.c232
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c8
-rw-r--r--net/netfilter/nf_nat_sip.c6
-rw-r--r--net/netfilter/nf_synproxy_core.c434
-rw-r--r--net/netfilter/nf_tproxy_core.c62
-rw-r--r--net/netfilter/nfnetlink_acct.c7
-rw-r--r--net/netfilter/nfnetlink_cthelper.c16
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c13
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue_core.c71
-rw-r--r--net/netfilter/nfnetlink_queue_ct.c23
-rw-r--r--net/netfilter/xt_CT.c10
-rw-r--r--net/netfilter/xt_TCPMSS.c51
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c14
-rw-r--r--net/netfilter/xt_TEE.c2
-rw-r--r--net/netfilter/xt_TPROXY.c169
-rw-r--r--net/netfilter/xt_addrtype.c2
-rw-r--r--net/netfilter/xt_rateest.c2
-rw-r--r--net/netfilter/xt_socket.c164
-rw-r--r--net/netlabel/netlabel_cipso_v4.c4
-rw-r--r--net/netlabel/netlabel_domainhash.c104
-rw-r--r--net/netlabel/netlabel_domainhash.h46
-rw-r--r--net/netlabel/netlabel_kapi.c88
-rw-r--r--net/netlabel/netlabel_mgmt.c44
-rw-r--r--net/netlabel/netlabel_unlabeled.c9
-rw-r--r--net/netlink/af_netlink.c311
-rw-r--r--net/netlink/af_netlink.h4
-rw-r--r--net/netlink/genetlink.c71
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/netrom/sysctl_net_netrom.c2
-rw-r--r--net/nfc/core.c236
-rw-r--r--net/nfc/hci/core.c75
-rw-r--r--net/nfc/llcp.h3
-rw-r--r--net/nfc/llcp_commands.c22
-rw-r--r--net/nfc/llcp_core.c16
-rw-r--r--net/nfc/llcp_sock.c19
-rw-r--r--net/nfc/nci/Kconfig11
-rw-r--r--net/nfc/nci/Makefile4
-rw-r--r--net/nfc/nci/core.c37
-rw-r--r--net/nfc/nci/data.c2
-rw-r--r--net/nfc/nci/spi.c378
-rw-r--r--net/nfc/netlink.c276
-rw-r--r--net/nfc/nfc.h10
-rw-r--r--net/openvswitch/Kconfig28
-rw-r--r--net/openvswitch/Makefile10
-rw-r--r--net/openvswitch/actions.c54
-rw-r--r--net/openvswitch/datapath.c534
-rw-r--r--net/openvswitch/datapath.h10
-rw-r--r--net/openvswitch/dp_notify.c2
-rw-r--r--net/openvswitch/flow.c1524
-rw-r--r--net/openvswitch/flow.h124
-rw-r--r--net/openvswitch/vport-gre.c272
-rw-r--r--net/openvswitch/vport-internal_dev.c3
-rw-r--r--net/openvswitch/vport-netdev.c29
-rw-r--r--net/openvswitch/vport-netdev.h1
-rw-r--r--net/openvswitch/vport-vxlan.c204
-rw-r--r--net/openvswitch/vport.c40
-rw-r--r--net/openvswitch/vport.h24
-rw-r--r--net/packet/af_packet.c77
-rw-r--r--net/phonet/pn_dev.c4
-rw-r--r--net/phonet/socket.c2
-rw-r--r--net/phonet/sysctl.c4
-rw-r--r--net/rds/ib_sysctl.c2
-rw-r--r--net/rds/iw_sysctl.c2
-rw-r--r--net/rds/sysctl.c2
-rw-r--r--net/rfkill/core.c90
-rw-r--r--net/rfkill/rfkill-regulator.c8
-rw-r--r--net/rose/af_rose.c6
-rw-r--r--net/rose/sysctl_net_rose.c2
-rw-r--r--net/sched/Kconfig14
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/cls_cgroup.c39
-rw-r--r--net/sched/sch_api.c105
-rw-r--r--net/sched/sch_atm.c1
-rw-r--r--net/sched/sch_cbq.c3
-rw-r--r--net/sched/sch_choke.c3
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_fq.c817
-rw-r--r--net/sched/sch_generic.c72
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_htb.c276
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c2
-rw-r--r--net/sched/sch_netem.c116
-rw-r--r--net/sched/sch_qfq.c214
-rw-r--r--net/sched/sch_tbf.c47
-rw-r--r--net/sctp/Kconfig11
-rw-r--r--net/sctp/associola.c96
-rw-r--r--net/sctp/auth.c8
-rw-r--r--net/sctp/bind_addr.c10
-rw-r--r--net/sctp/chunk.c19
-rw-r--r--net/sctp/command.c8
-rw-r--r--net/sctp/debug.c12
-rw-r--r--net/sctp/endpointola.c33
-rw-r--r--net/sctp/input.c33
-rw-r--r--net/sctp/inqueue.c17
-rw-r--r--net/sctp/ipv6.c79
-rw-r--r--net/sctp/objcnt.c8
-rw-r--r--net/sctp/output.c48
-rw-r--r--net/sctp/outqueue.c228
-rw-r--r--net/sctp/primitive.c8
-rw-r--r--net/sctp/probe.c27
-rw-r--r--net/sctp/proc.c24
-rw-r--r--net/sctp/protocol.c66
-rw-r--r--net/sctp/sm_make_chunk.c179
-rw-r--r--net/sctp/sm_sideeffect.c115
-rw-r--r--net/sctp/sm_statefuns.c93
-rw-r--r--net/sctp/sm_statetable.c8
-rw-r--r--net/sctp/socket.c308
-rw-r--r--net/sctp/ssnmap.c8
-rw-r--r--net/sctp/sysctl.c18
-rw-r--r--net/sctp/transport.c63
-rw-r--r--net/sctp/tsnmap.c18
-rw-r--r--net/sctp/ulpevent.c18
-rw-r--r--net/sctp/ulpqueue.c8
-rw-r--r--net/socket.c88
-rw-r--r--net/sunrpc/auth.c68
-rw-r--r--net/sunrpc/auth_generic.c82
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c453
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c5
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c29
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c50
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.h5
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c16
-rw-r--r--net/sunrpc/auth_null.c6
-rw-r--r--net/sunrpc/auth_unix.c6
-rw-r--r--net/sunrpc/cache.c101
-rw-r--r--net/sunrpc/clnt.c241
-rw-r--r--net/sunrpc/netns.h1
-rw-r--r--net/sunrpc/rpc_pipe.c224
-rw-r--r--net/sunrpc/rpcb_clnt.c48
-rw-r--r--net/sunrpc/sched.c23
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svcauth_unix.c6
-rw-r--r--net/sunrpc/svcsock.c15
-rw-r--r--net/sunrpc/sysctl.c10
-rw-r--r--net/sunrpc/xdr.c9
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c20
-rw-r--r--net/sunrpc/xprtrdma/transport.c4
-rw-r--r--net/sunrpc/xprtsock.c20
-rw-r--r--net/sysctl_net.c4
-rw-r--r--net/tipc/Makefile3
-rw-r--r--net/tipc/bcast.c3
-rw-r--r--net/tipc/bcast.h3
-rw-r--r--net/tipc/bearer.c9
-rw-r--r--net/tipc/config.c119
-rw-r--r--net/tipc/core.c22
-rw-r--r--net/tipc/core.h17
-rw-r--r--net/tipc/discover.c7
-rw-r--r--net/tipc/eth_media.c19
-rw-r--r--net/tipc/ib_media.c25
-rw-r--r--net/tipc/link.c88
-rw-r--r--net/tipc/msg.c19
-rw-r--r--net/tipc/msg.h8
-rw-r--r--net/tipc/name_table.c10
-rw-r--r--net/tipc/name_table.h11
-rw-r--r--net/tipc/node_subscr.c2
-rw-r--r--net/tipc/port.c320
-rw-r--r--net/tipc/port.h85
-rw-r--r--net/tipc/server.c605
-rw-r--r--net/tipc/server.h94
-rw-r--r--net/tipc/socket.c150
-rw-r--r--net/tipc/subscr.c348
-rw-r--r--net/tipc/subscr.h21
-rw-r--r--net/tipc/sysctl.c64
-rw-r--r--net/unix/af_unix.c73
-rw-r--r--net/unix/sysctl_net_unix.c2
-rw-r--r--net/vmw_vsock/af_vsock.c60
-rw-r--r--net/vmw_vsock/af_vsock.h175
-rw-r--r--net/vmw_vsock/vmci_transport.c20
-rw-r--r--net/vmw_vsock/vmci_transport.h4
-rw-r--r--net/vmw_vsock/vsock_addr.c3
-rw-r--r--net/vmw_vsock/vsock_addr.h30
-rw-r--r--net/wireless/chan.c57
-rw-r--r--net/wireless/core.c280
-rw-r--r--net/wireless/core.h125
-rw-r--r--net/wireless/debugfs.c4
-rw-r--r--net/wireless/ibss.c16
-rw-r--r--net/wireless/mesh.c18
-rw-r--r--net/wireless/mlme.c437
-rw-r--r--net/wireless/nl80211.c1404
-rw-r--r--net/wireless/nl80211.h4
-rw-r--r--net/wireless/rdev-ops.h17
-rw-r--r--net/wireless/reg.c143
-rw-r--r--net/wireless/scan.c86
-rw-r--r--net/wireless/sme.c679
-rw-r--r--net/wireless/sysfs.c33
-rw-r--r--net/wireless/trace.h99
-rw-r--r--net/wireless/util.c45
-rw-r--r--net/wireless/wext-compat.c22
-rw-r--r--net/wireless/wext-sme.c49
-rw-r--r--net/x25/af_x25.c17
-rw-r--r--net/x25/x25_facilities.c4
-rw-r--r--net/xfrm/xfrm_input.c5
-rw-r--r--net/xfrm/xfrm_output.c30
-rw-r--r--net/xfrm/xfrm_policy.c23
-rw-r--r--net/xfrm/xfrm_proc.c1
-rw-r--r--net/xfrm/xfrm_state.c22
540 files changed, 24942 insertions, 14110 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 1eb05d80b07b..3ed616215870 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -24,6 +24,11 @@
24static unsigned int mrp_join_time __read_mostly = 200; 24static unsigned int mrp_join_time __read_mostly = 200;
25module_param(mrp_join_time, uint, 0644); 25module_param(mrp_join_time, uint, 0644);
26MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); 26MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
27
28static unsigned int mrp_periodic_time __read_mostly = 1000;
29module_param(mrp_periodic_time, uint, 0644);
30MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)");
31
27MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
28 33
29static const u8 34static const u8
@@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data)
595 mrp_join_timer_arm(app); 600 mrp_join_timer_arm(app);
596} 601}
597 602
603static void mrp_periodic_timer_arm(struct mrp_applicant *app)
604{
605 mod_timer(&app->periodic_timer,
606 jiffies + msecs_to_jiffies(mrp_periodic_time));
607}
608
609static void mrp_periodic_timer(unsigned long data)
610{
611 struct mrp_applicant *app = (struct mrp_applicant *)data;
612
613 spin_lock(&app->lock);
614 mrp_mad_event(app, MRP_EVENT_PERIODIC);
615 mrp_pdu_queue(app);
616 spin_unlock(&app->lock);
617
618 mrp_periodic_timer_arm(app);
619}
620
598static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) 621static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
599{ 622{
600 __be16 endmark; 623 __be16 endmark;
@@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
845 rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); 868 rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
846 setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); 869 setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
847 mrp_join_timer_arm(app); 870 mrp_join_timer_arm(app);
871 setup_timer(&app->periodic_timer, mrp_periodic_timer,
872 (unsigned long)app);
873 mrp_periodic_timer_arm(app);
848 return 0; 874 return 0;
849 875
850err3: 876err3:
@@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
870 * all pending messages before the applicant is gone. 896 * all pending messages before the applicant is gone.
871 */ 897 */
872 del_timer_sync(&app->join_timer); 898 del_timer_sync(&app->join_timer);
899 del_timer_sync(&app->periodic_timer);
873 900
874 spin_lock_bh(&app->lock); 901 spin_lock_bh(&app->lock);
875 mrp_mad_event(app, MRP_EVENT_TX); 902 mrp_mad_event(app, MRP_EVENT_TX);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9424f3718ea7..61fc573f1142 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -210,6 +210,7 @@ out_vid_del:
210static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) 210static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
211{ 211{
212 struct net_device *new_dev; 212 struct net_device *new_dev;
213 struct vlan_dev_priv *vlan;
213 struct net *net = dev_net(real_dev); 214 struct net *net = dev_net(real_dev);
214 struct vlan_net *vn = net_generic(net, vlan_net_id); 215 struct vlan_net *vn = net_generic(net, vlan_net_id);
215 char name[IFNAMSIZ]; 216 char name[IFNAMSIZ];
@@ -260,11 +261,12 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
260 new_dev->mtu = real_dev->mtu; 261 new_dev->mtu = real_dev->mtu;
261 new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT); 262 new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
262 263
263 vlan_dev_priv(new_dev)->vlan_proto = htons(ETH_P_8021Q); 264 vlan = vlan_dev_priv(new_dev);
264 vlan_dev_priv(new_dev)->vlan_id = vlan_id; 265 vlan->vlan_proto = htons(ETH_P_8021Q);
265 vlan_dev_priv(new_dev)->real_dev = real_dev; 266 vlan->vlan_id = vlan_id;
266 vlan_dev_priv(new_dev)->dent = NULL; 267 vlan->real_dev = real_dev;
267 vlan_dev_priv(new_dev)->flags = VLAN_FLAG_REORDER_HDR; 268 vlan->dent = NULL;
269 vlan->flags = VLAN_FLAG_REORDER_HDR;
268 270
269 new_dev->rtnl_link_ops = &vlan_link_ops; 271 new_dev->rtnl_link_ops = &vlan_link_ops;
270 err = register_vlan_dev(new_dev); 272 err = register_vlan_dev(new_dev);
@@ -341,7 +343,7 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event)
341static int vlan_device_event(struct notifier_block *unused, unsigned long event, 343static int vlan_device_event(struct notifier_block *unused, unsigned long event,
342 void *ptr) 344 void *ptr)
343{ 345{
344 struct net_device *dev = ptr; 346 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
345 struct vlan_group *grp; 347 struct vlan_group *grp;
346 struct vlan_info *vlan_info; 348 struct vlan_info *vlan_info;
347 int i, flgs; 349 int i, flgs;
@@ -459,6 +461,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
459 461
460 case NETDEV_NOTIFY_PEERS: 462 case NETDEV_NOTIFY_PEERS:
461 case NETDEV_BONDING_FAILOVER: 463 case NETDEV_BONDING_FAILOVER:
464 case NETDEV_RESEND_IGMP:
462 /* Propagate to vlan devices */ 465 /* Propagate to vlan devices */
463 vlan_group_for_each_dev(grp, i, vlandev) 466 vlan_group_for_each_dev(grp, i, vlandev)
464 call_netdevice_notifiers(event, vlandev); 467 call_netdevice_notifiers(event, vlandev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 8a15eaadc4bd..6ee48aac776f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
9{ 9{
10 struct sk_buff *skb = *skbp; 10 struct sk_buff *skb = *skbp;
11 __be16 vlan_proto = skb->vlan_proto; 11 __be16 vlan_proto = skb->vlan_proto;
12 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; 12 u16 vlan_id = vlan_tx_tag_get_id(skb);
13 struct net_device *vlan_dev; 13 struct net_device *vlan_dev;
14 struct vlan_pcpu_stats *rx_stats; 14 struct vlan_pcpu_stats *rx_stats;
15 15
@@ -91,7 +91,12 @@ EXPORT_SYMBOL(__vlan_find_dev_deep);
91 91
92struct net_device *vlan_dev_real_dev(const struct net_device *dev) 92struct net_device *vlan_dev_real_dev(const struct net_device *dev)
93{ 93{
94 return vlan_dev_priv(dev)->real_dev; 94 struct net_device *ret = vlan_dev_priv(dev)->real_dev;
95
96 while (is_vlan_dev(ret))
97 ret = vlan_dev_priv(ret)->real_dev;
98
99 return ret;
95} 100}
96EXPORT_SYMBOL(vlan_dev_real_dev); 101EXPORT_SYMBOL(vlan_dev_real_dev);
97 102
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3a8c8fd63c88..09bf1c38805b 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -73,6 +73,8 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
73{ 73{
74 struct vlan_priority_tci_mapping *mp; 74 struct vlan_priority_tci_mapping *mp;
75 75
76 smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
77
76 mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)]; 78 mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)];
77 while (mp) { 79 while (mp) {
78 if (mp->priority == skb->priority) { 80 if (mp->priority == skb->priority) {
@@ -105,10 +107,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
105 u16 vlan_tci = 0; 107 u16 vlan_tci = 0;
106 int rc; 108 int rc;
107 109
108 if (!(vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR)) { 110 if (!(vlan->flags & VLAN_FLAG_REORDER_HDR)) {
109 vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); 111 vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
110 112
111 vlan_tci = vlan_dev_priv(dev)->vlan_id; 113 vlan_tci = vlan->vlan_id;
112 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); 114 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
113 vhdr->h_vlan_TCI = htons(vlan_tci); 115 vhdr->h_vlan_TCI = htons(vlan_tci);
114 116
@@ -131,7 +133,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
131 saddr = dev->dev_addr; 133 saddr = dev->dev_addr;
132 134
133 /* Now make the underlying real hard header */ 135 /* Now make the underlying real hard header */
134 dev = vlan_dev_priv(dev)->real_dev; 136 dev = vlan->real_dev;
135 rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen); 137 rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
136 if (rc > 0) 138 if (rc > 0)
137 rc += vhdrlen; 139 rc += vhdrlen;
@@ -249,6 +251,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
249 np->next = mp; 251 np->next = mp;
250 np->priority = skb_prio; 252 np->priority = skb_prio;
251 np->vlan_qos = vlan_qos; 253 np->vlan_qos = vlan_qos;
254 /* Before inserting this element in hash table, make sure all its fields
255 * are committed to memory.
256 * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
257 */
258 smp_wmb();
252 vlan->egress_priority_map[skb_prio & 0xF] = np; 259 vlan->egress_priority_map[skb_prio & 0xF] = np;
253 if (vlan_qos) 260 if (vlan_qos)
254 vlan->nr_egress_mappings++; 261 vlan->nr_egress_mappings++;
@@ -575,7 +582,7 @@ static int vlan_dev_init(struct net_device *dev)
575 dev->dev_id = real_dev->dev_id; 582 dev->dev_id = real_dev->dev_id;
576 583
577 if (is_zero_ether_addr(dev->dev_addr)) 584 if (is_zero_ether_addr(dev->dev_addr))
578 memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len); 585 eth_hw_addr_inherit(dev, real_dev);
579 if (is_zero_ether_addr(dev->broadcast)) 586 if (is_zero_ether_addr(dev->broadcast))
580 memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); 587 memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
581 588
diff --git a/net/9p/client.c b/net/9p/client.c
index 8eb75425e6e6..ee8fd6bd4035 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -127,7 +127,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
127 char *s; 127 char *s;
128 int ret = 0; 128 int ret = 0;
129 129
130 clnt->proto_version = p9_proto_2000u; 130 clnt->proto_version = p9_proto_2000L;
131 clnt->msize = 8192; 131 clnt->msize = 8192;
132 132
133 if (!opts) 133 if (!opts)
@@ -204,6 +204,17 @@ free_and_return:
204 return ret; 204 return ret;
205} 205}
206 206
207struct p9_fcall *p9_fcall_alloc(int alloc_msize)
208{
209 struct p9_fcall *fc;
210 fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
211 if (!fc)
212 return NULL;
213 fc->capacity = alloc_msize;
214 fc->sdata = (char *) fc + sizeof(struct p9_fcall);
215 return fc;
216}
217
207/** 218/**
208 * p9_tag_alloc - lookup/allocate a request by tag 219 * p9_tag_alloc - lookup/allocate a request by tag
209 * @c: client session to lookup tag within 220 * @c: client session to lookup tag within
@@ -256,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
256 col = tag % P9_ROW_MAXTAG; 267 col = tag % P9_ROW_MAXTAG;
257 268
258 req = &c->reqs[row][col]; 269 req = &c->reqs[row][col];
259 if (!req->tc) { 270 if (!req->wq) {
260 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); 271 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
261 if (!req->wq) { 272 if (!req->wq)
262 pr_err("Couldn't grow tag array\n"); 273 goto grow_failed;
263 return ERR_PTR(-ENOMEM);
264 }
265 init_waitqueue_head(req->wq); 274 init_waitqueue_head(req->wq);
266 req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
267 GFP_NOFS);
268 req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
269 GFP_NOFS);
270 if ((!req->tc) || (!req->rc)) {
271 pr_err("Couldn't grow tag array\n");
272 kfree(req->tc);
273 kfree(req->rc);
274 kfree(req->wq);
275 req->tc = req->rc = NULL;
276 req->wq = NULL;
277 return ERR_PTR(-ENOMEM);
278 }
279 req->tc->capacity = alloc_msize;
280 req->rc->capacity = alloc_msize;
281 req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
282 req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
283 } 275 }
284 276
277 if (!req->tc)
278 req->tc = p9_fcall_alloc(alloc_msize);
279 if (!req->rc)
280 req->rc = p9_fcall_alloc(alloc_msize);
281 if (!req->tc || !req->rc)
282 goto grow_failed;
283
285 p9pdu_reset(req->tc); 284 p9pdu_reset(req->tc);
286 p9pdu_reset(req->rc); 285 p9pdu_reset(req->rc);
287 286
288 req->tc->tag = tag-1; 287 req->tc->tag = tag-1;
289 req->status = REQ_STATUS_ALLOC; 288 req->status = REQ_STATUS_ALLOC;
290 289
291 return &c->reqs[row][col]; 290 return req;
291
292grow_failed:
293 pr_err("Couldn't grow tag array\n");
294 kfree(req->tc);
295 kfree(req->rc);
296 kfree(req->wq);
297 req->tc = req->rc = NULL;
298 req->wq = NULL;
299 return ERR_PTR(-ENOMEM);
292} 300}
293 301
294/** 302/**
@@ -562,36 +570,19 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
562 570
563 if (!p9_is_proto_dotl(c)) { 571 if (!p9_is_proto_dotl(c)) {
564 /* Error is reported in string format */ 572 /* Error is reported in string format */
565 uint16_t len; 573 int len;
566 /* 7 = header size for RERROR, 2 is the size of string len; */ 574 /* 7 = header size for RERROR; */
567 int inline_len = in_hdrlen - (7 + 2); 575 int inline_len = in_hdrlen - 7;
568 576
569 /* Read the size of error string */ 577 len = req->rc->size - req->rc->offset;
570 err = p9pdu_readf(req->rc, c->proto_version, "w", &len); 578 if (len > (P9_ZC_HDR_SZ - 7)) {
571 if (err) 579 err = -EFAULT;
572 goto out_err;
573
574 ename = kmalloc(len + 1, GFP_NOFS);
575 if (!ename) {
576 err = -ENOMEM;
577 goto out_err; 580 goto out_err;
578 } 581 }
579 if (len <= inline_len) {
580 /* We have error in protocol buffer itself */
581 if (pdu_read(req->rc, ename, len)) {
582 err = -EFAULT;
583 goto out_free;
584
585 }
586 } else {
587 /*
588 * Part of the data is in user space buffer.
589 */
590 if (pdu_read(req->rc, ename, inline_len)) {
591 err = -EFAULT;
592 goto out_free;
593 582
594 } 583 ename = &req->rc->sdata[req->rc->offset];
584 if (len > inline_len) {
585 /* We have error in external buffer */
595 if (kern_buf) { 586 if (kern_buf) {
596 memcpy(ename + inline_len, uidata, 587 memcpy(ename + inline_len, uidata,
597 len - inline_len); 588 len - inline_len);
@@ -600,19 +591,19 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
600 uidata, len - inline_len); 591 uidata, len - inline_len);
601 if (err) { 592 if (err) {
602 err = -EFAULT; 593 err = -EFAULT;
603 goto out_free; 594 goto out_err;
604 } 595 }
605 } 596 }
606 } 597 }
607 ename[len] = 0; 598 ename = NULL;
608 if (p9_is_proto_dotu(c)) { 599 err = p9pdu_readf(req->rc, c->proto_version, "s?d",
609 /* For dotu we also have error code */ 600 &ename, &ecode);
610 err = p9pdu_readf(req->rc, 601 if (err)
611 c->proto_version, "d", &ecode); 602 goto out_err;
612 if (err) 603
613 goto out_free; 604 if (p9_is_proto_dotu(c))
614 err = -ecode; 605 err = -ecode;
615 } 606
616 if (!err || !IS_ERR_VALUE(err)) { 607 if (!err || !IS_ERR_VALUE(err)) {
617 err = p9_errstr2errno(ename, strlen(ename)); 608 err = p9_errstr2errno(ename, strlen(ename));
618 609
@@ -628,8 +619,6 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
628 } 619 }
629 return err; 620 return err;
630 621
631out_free:
632 kfree(ename);
633out_err: 622out_err:
634 p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); 623 p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
635 return err; 624 return err;
@@ -667,12 +656,15 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
667 return PTR_ERR(req); 656 return PTR_ERR(req);
668 657
669 658
670 /* if we haven't received a response for oldreq, 659 /*
671 remove it from the list. */ 660 * if we haven't received a response for oldreq,
672 spin_lock(&c->lock); 661 * remove it from the list
673 if (oldreq->status == REQ_STATUS_FLSH) 662 */
663 if (oldreq->status == REQ_STATUS_FLSH) {
664 spin_lock(&c->lock);
674 list_del(&oldreq->req_list); 665 list_del(&oldreq->req_list);
675 spin_unlock(&c->lock); 666 spin_unlock(&c->lock);
667 }
676 668
677 p9_free_req(c, req); 669 p9_free_req(c, req);
678 return 0; 670 return 0;
@@ -995,6 +987,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
995{ 987{
996 int err; 988 int err;
997 struct p9_client *clnt; 989 struct p9_client *clnt;
990 char *client_id;
998 991
999 err = 0; 992 err = 0;
1000 clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); 993 clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
@@ -1003,6 +996,10 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
1003 996
1004 clnt->trans_mod = NULL; 997 clnt->trans_mod = NULL;
1005 clnt->trans = NULL; 998 clnt->trans = NULL;
999
1000 client_id = utsname()->nodename;
1001 memcpy(clnt->name, client_id, strlen(client_id) + 1);
1002
1006 spin_lock_init(&clnt->lock); 1003 spin_lock_init(&clnt->lock);
1007 INIT_LIST_HEAD(&clnt->fidlist); 1004 INIT_LIST_HEAD(&clnt->fidlist);
1008 1005
@@ -1015,6 +1012,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
1015 goto destroy_tagpool; 1012 goto destroy_tagpool;
1016 1013
1017 if (!clnt->trans_mod) 1014 if (!clnt->trans_mod)
1015 clnt->trans_mod = v9fs_get_trans_by_name("virtio");
1016
1017 if (!clnt->trans_mod)
1018 clnt->trans_mod = v9fs_get_default_trans(); 1018 clnt->trans_mod = v9fs_get_default_trans();
1019 1019
1020 if (clnt->trans_mod == NULL) { 1020 if (clnt->trans_mod == NULL) {
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index de8df957867d..2ee3879161b1 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -24,11 +24,11 @@
24 */ 24 */
25void p9_release_pages(struct page **pages, int nr_pages) 25void p9_release_pages(struct page **pages, int nr_pages)
26{ 26{
27 int i = 0; 27 int i;
28 while (pages[i] && nr_pages--) { 28
29 put_page(pages[i]); 29 for (i = 0; i < nr_pages; i++)
30 i++; 30 if (pages[i])
31 } 31 put_page(pages[i]);
32} 32}
33EXPORT_SYMBOL(p9_release_pages); 33EXPORT_SYMBOL(p9_release_pages);
34 34
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 02efb25c2957..3ffda1b3799b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -63,6 +63,7 @@ struct p9_fd_opts {
63 int rfd; 63 int rfd;
64 int wfd; 64 int wfd;
65 u16 port; 65 u16 port;
66 int privport;
66}; 67};
67 68
68/** 69/**
@@ -87,12 +88,15 @@ struct p9_trans_fd {
87enum { 88enum {
88 /* Options that take integer arguments */ 89 /* Options that take integer arguments */
89 Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, 90 Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
91 /* Options that take no arguments */
92 Opt_privport,
90}; 93};
91 94
92static const match_table_t tokens = { 95static const match_table_t tokens = {
93 {Opt_port, "port=%u"}, 96 {Opt_port, "port=%u"},
94 {Opt_rfdno, "rfdno=%u"}, 97 {Opt_rfdno, "rfdno=%u"},
95 {Opt_wfdno, "wfdno=%u"}, 98 {Opt_wfdno, "wfdno=%u"},
99 {Opt_privport, "privport"},
96 {Opt_err, NULL}, 100 {Opt_err, NULL},
97}; 101};
98 102
@@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock);
161static LIST_HEAD(p9_poll_pending_list); 165static LIST_HEAD(p9_poll_pending_list);
162static DECLARE_WORK(p9_poll_work, p9_poll_workfn); 166static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
163 167
168static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
169static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
170
164static void p9_mux_poll_stop(struct p9_conn *m) 171static void p9_mux_poll_stop(struct p9_conn *m)
165{ 172{
166 unsigned long flags; 173 unsigned long flags;
@@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
741 if (!*p) 748 if (!*p)
742 continue; 749 continue;
743 token = match_token(p, tokens, args); 750 token = match_token(p, tokens, args);
744 if (token != Opt_err) { 751 if ((token != Opt_err) && (token != Opt_privport)) {
745 r = match_int(&args[0], &option); 752 r = match_int(&args[0], &option);
746 if (r < 0) { 753 if (r < 0) {
747 p9_debug(P9_DEBUG_ERROR, 754 p9_debug(P9_DEBUG_ERROR,
@@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
759 case Opt_wfdno: 766 case Opt_wfdno:
760 opts->wfd = option; 767 opts->wfd = option;
761 break; 768 break;
769 case Opt_privport:
770 opts->privport = 1;
771 break;
762 default: 772 default:
763 continue; 773 continue;
764 } 774 }
@@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf)
898 return 0; 908 return 0;
899} 909}
900 910
911static int p9_bind_privport(struct socket *sock)
912{
913 struct sockaddr_in cl;
914 int port, err = -EINVAL;
915
916 memset(&cl, 0, sizeof(cl));
917 cl.sin_family = AF_INET;
918 cl.sin_addr.s_addr = INADDR_ANY;
919 for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
920 cl.sin_port = htons((ushort)port);
921 err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
922 if (err != -EADDRINUSE)
923 break;
924 }
925 return err;
926}
927
928
901static int 929static int
902p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) 930p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
903{ 931{
@@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
926 return err; 954 return err;
927 } 955 }
928 956
957 if (opts.privport) {
958 err = p9_bind_privport(csocket);
959 if (err < 0) {
960 pr_err("%s (%d): problem binding to privport\n",
961 __func__, task_pid_nr(current));
962 sock_release(csocket);
963 return err;
964 }
965 }
966
929 err = csocket->ops->connect(csocket, 967 err = csocket->ops->connect(csocket,
930 (struct sockaddr *)&sin_server, 968 (struct sockaddr *)&sin_server,
931 sizeof(struct sockaddr_in), 0); 969 sizeof(struct sockaddr_in), 0);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 2c69ddd691a1..8f68df5d2973 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -57,9 +57,7 @@
57#define P9_RDMA_IRD 0 57#define P9_RDMA_IRD 0
58#define P9_RDMA_ORD 0 58#define P9_RDMA_ORD 0
59#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ 59#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
60#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can 60#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */
61 * safely advertise a maxsize
62 * of 64k */
63 61
64/** 62/**
65 * struct p9_trans_rdma - RDMA transport instance 63 * struct p9_trans_rdma - RDMA transport instance
@@ -75,7 +73,9 @@
75 * @sq_depth: The depth of the Send Queue 73 * @sq_depth: The depth of the Send Queue
76 * @sq_sem: Semaphore for the SQ 74 * @sq_sem: Semaphore for the SQ
77 * @rq_depth: The depth of the Receive Queue. 75 * @rq_depth: The depth of the Receive Queue.
78 * @rq_count: Count of requests in the Receive Queue. 76 * @rq_sem: Semaphore for the RQ
77 * @excess_rc : Amount of posted Receive Contexts without a pending request.
78 * See rdma_request()
79 * @addr: The remote peer's address 79 * @addr: The remote peer's address
80 * @req_lock: Protects the active request list 80 * @req_lock: Protects the active request list
81 * @cm_done: Completion event for connection management tracking 81 * @cm_done: Completion event for connection management tracking
@@ -100,7 +100,8 @@ struct p9_trans_rdma {
100 int sq_depth; 100 int sq_depth;
101 struct semaphore sq_sem; 101 struct semaphore sq_sem;
102 int rq_depth; 102 int rq_depth;
103 atomic_t rq_count; 103 struct semaphore rq_sem;
104 atomic_t excess_rc;
104 struct sockaddr_in addr; 105 struct sockaddr_in addr;
105 spinlock_t req_lock; 106 spinlock_t req_lock;
106 107
@@ -296,6 +297,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
296 if (!req) 297 if (!req)
297 goto err_out; 298 goto err_out;
298 299
300 /* Check that we have not yet received a reply for this request.
301 */
302 if (unlikely(req->rc)) {
303 pr_err("Duplicate reply for request %d", tag);
304 goto err_out;
305 }
306
299 req->rc = c->rc; 307 req->rc = c->rc;
300 req->status = REQ_STATUS_RCVD; 308 req->status = REQ_STATUS_RCVD;
301 p9_client_cb(client, req); 309 p9_client_cb(client, req);
@@ -336,8 +344,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
336 344
337 switch (c->wc_op) { 345 switch (c->wc_op) {
338 case IB_WC_RECV: 346 case IB_WC_RECV:
339 atomic_dec(&rdma->rq_count);
340 handle_recv(client, rdma, c, wc.status, wc.byte_len); 347 handle_recv(client, rdma, c, wc.status, wc.byte_len);
348 up(&rdma->rq_sem);
341 break; 349 break;
342 350
343 case IB_WC_SEND: 351 case IB_WC_SEND:
@@ -421,32 +429,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
421 struct p9_rdma_context *c = NULL; 429 struct p9_rdma_context *c = NULL;
422 struct p9_rdma_context *rpl_context = NULL; 430 struct p9_rdma_context *rpl_context = NULL;
423 431
432 /* When an error occurs between posting the recv and the send,
433 * there will be a receive context posted without a pending request.
434 * Since there is no way to "un-post" it, we remember it and skip
435 * post_recv() for the next request.
436 * So here,
437 * see if we are this `next request' and need to absorb an excess rc.
438 * If yes, then drop and free our own, and do not recv_post().
439 **/
440 if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
441 if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
442 /* Got one ! */
443 kfree(req->rc);
444 req->rc = NULL;
445 goto dont_need_post_recv;
446 } else {
447 /* We raced and lost. */
448 atomic_inc(&rdma->excess_rc);
449 }
450 }
451
424 /* Allocate an fcall for the reply */ 452 /* Allocate an fcall for the reply */
425 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); 453 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
426 if (!rpl_context) { 454 if (!rpl_context) {
427 err = -ENOMEM; 455 err = -ENOMEM;
428 goto err_close; 456 goto recv_error;
429 }
430
431 /*
432 * If the request has a buffer, steal it, otherwise
433 * allocate a new one. Typically, requests should already
434 * have receive buffers allocated and just swap them around
435 */
436 if (!req->rc) {
437 req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
438 GFP_NOFS);
439 if (req->rc) {
440 req->rc->sdata = (char *) req->rc +
441 sizeof(struct p9_fcall);
442 req->rc->capacity = client->msize;
443 }
444 } 457 }
445 rpl_context->rc = req->rc; 458 rpl_context->rc = req->rc;
446 if (!rpl_context->rc) {
447 err = -ENOMEM;
448 goto err_free2;
449 }
450 459
451 /* 460 /*
452 * Post a receive buffer for this request. We need to ensure 461 * Post a receive buffer for this request. We need to ensure
@@ -455,29 +464,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
455 * outstanding request, so we must keep a count to avoid 464 * outstanding request, so we must keep a count to avoid
456 * overflowing the RQ. 465 * overflowing the RQ.
457 */ 466 */
458 if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { 467 if (down_interruptible(&rdma->rq_sem)) {
459 err = post_recv(client, rpl_context); 468 err = -EINTR;
460 if (err) 469 goto recv_error;
461 goto err_free1; 470 }
462 } else
463 atomic_dec(&rdma->rq_count);
464 471
472 err = post_recv(client, rpl_context);
473 if (err) {
474 p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
475 goto recv_error;
476 }
465 /* remove posted receive buffer from request structure */ 477 /* remove posted receive buffer from request structure */
466 req->rc = NULL; 478 req->rc = NULL;
467 479
480dont_need_post_recv:
468 /* Post the request */ 481 /* Post the request */
469 c = kmalloc(sizeof *c, GFP_NOFS); 482 c = kmalloc(sizeof *c, GFP_NOFS);
470 if (!c) { 483 if (!c) {
471 err = -ENOMEM; 484 err = -ENOMEM;
472 goto err_free1; 485 goto send_error;
473 } 486 }
474 c->req = req; 487 c->req = req;
475 488
476 c->busa = ib_dma_map_single(rdma->cm_id->device, 489 c->busa = ib_dma_map_single(rdma->cm_id->device,
477 c->req->tc->sdata, c->req->tc->size, 490 c->req->tc->sdata, c->req->tc->size,
478 DMA_TO_DEVICE); 491 DMA_TO_DEVICE);
479 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) 492 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
480 goto error; 493 err = -EIO;
494 goto send_error;
495 }
481 496
482 sge.addr = c->busa; 497 sge.addr = c->busa;
483 sge.length = c->req->tc->size; 498 sge.length = c->req->tc->size;
@@ -491,22 +506,32 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
491 wr.sg_list = &sge; 506 wr.sg_list = &sge;
492 wr.num_sge = 1; 507 wr.num_sge = 1;
493 508
494 if (down_interruptible(&rdma->sq_sem)) 509 if (down_interruptible(&rdma->sq_sem)) {
495 goto error; 510 err = -EINTR;
511 goto send_error;
512 }
496 513
497 return ib_post_send(rdma->qp, &wr, &bad_wr); 514 err = ib_post_send(rdma->qp, &wr, &bad_wr);
515 if (err)
516 goto send_error;
498 517
499 error: 518 /* Success */
519 return 0;
520
521 /* Handle errors that happened during or while preparing the send: */
522 send_error:
500 kfree(c); 523 kfree(c);
501 kfree(rpl_context->rc); 524 p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
502 kfree(rpl_context); 525
503 p9_debug(P9_DEBUG_ERROR, "EIO\n"); 526 /* Ach.
504 return -EIO; 527 * We did recv_post(), but not send. We have one recv_post in excess.
505 err_free1: 528 */
506 kfree(rpl_context->rc); 529 atomic_inc(&rdma->excess_rc);
507 err_free2: 530 return err;
531
532 /* Handle errors that happened during or while preparing post_recv(): */
533 recv_error:
508 kfree(rpl_context); 534 kfree(rpl_context);
509 err_close:
510 spin_lock_irqsave(&rdma->req_lock, flags); 535 spin_lock_irqsave(&rdma->req_lock, flags);
511 if (rdma->state < P9_RDMA_CLOSING) { 536 if (rdma->state < P9_RDMA_CLOSING) {
512 rdma->state = P9_RDMA_CLOSING; 537 rdma->state = P9_RDMA_CLOSING;
@@ -551,7 +576,8 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
551 spin_lock_init(&rdma->req_lock); 576 spin_lock_init(&rdma->req_lock);
552 init_completion(&rdma->cm_done); 577 init_completion(&rdma->cm_done);
553 sema_init(&rdma->sq_sem, rdma->sq_depth); 578 sema_init(&rdma->sq_sem, rdma->sq_depth);
554 atomic_set(&rdma->rq_count, 0); 579 sema_init(&rdma->rq_sem, rdma->rq_depth);
580 atomic_set(&rdma->excess_rc, 0);
555 581
556 return rdma; 582 return rdma;
557} 583}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e1c26b101830..990afab2be1b 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -577,6 +577,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
577 mutex_lock(&virtio_9p_lock); 577 mutex_lock(&virtio_9p_lock);
578 list_add_tail(&chan->chan_list, &virtio_chan_list); 578 list_add_tail(&chan->chan_list, &virtio_chan_list);
579 mutex_unlock(&virtio_9p_lock); 579 mutex_unlock(&virtio_9p_lock);
580
581 /* Let udev rules use the new mount_tag attribute. */
582 kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
583
580 return 0; 584 return 0;
581 585
582out_free_tag: 586out_free_tag:
@@ -654,6 +658,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
654 list_del(&chan->chan_list); 658 list_del(&chan->chan_list);
655 mutex_unlock(&virtio_9p_lock); 659 mutex_unlock(&virtio_9p_lock);
656 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 660 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
661 kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
657 kfree(chan->tag); 662 kfree(chan->tag);
658 kfree(chan->vc_wq); 663 kfree(chan->vc_wq);
659 kfree(chan); 664 kfree(chan);
diff --git a/net/Kconfig b/net/Kconfig
index 2ddc9046868e..b50dacc072f0 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -5,6 +5,7 @@
5menuconfig NET 5menuconfig NET
6 bool "Networking support" 6 bool "Networking support"
7 select NLATTR 7 select NLATTR
8 select GENERIC_NET_UTILS
8 ---help--- 9 ---help---
9 Unless you really know what you are doing, you should say Y here. 10 Unless you really know what you are doing, you should say Y here.
10 The reason is that some programs need kernel networking support even 11 The reason is that some programs need kernel networking support even
@@ -218,6 +219,7 @@ source "net/batman-adv/Kconfig"
218source "net/openvswitch/Kconfig" 219source "net/openvswitch/Kconfig"
219source "net/vmw_vsock/Kconfig" 220source "net/vmw_vsock/Kconfig"
220source "net/netlink/Kconfig" 221source "net/netlink/Kconfig"
222source "net/mpls/Kconfig"
221 223
222config RPS 224config RPS
223 boolean 225 boolean
@@ -226,7 +228,7 @@ config RPS
226 228
227config RFS_ACCEL 229config RFS_ACCEL
228 boolean 230 boolean
229 depends on RPS && GENERIC_HARDIRQS 231 depends on RPS
230 select CPU_RMAP 232 select CPU_RMAP
231 default y 233 default y
232 234
@@ -242,6 +244,10 @@ config NETPRIO_CGROUP
242 Cgroup subsystem for use in assigning processes to network priorities on 244 Cgroup subsystem for use in assigning processes to network priorities on
243 a per-interface basis 245 a per-interface basis
244 246
247config NET_RX_BUSY_POLL
248 boolean
249 default y
250
245config BQL 251config BQL
246 boolean 252 boolean
247 depends on SYSFS 253 depends on SYSFS
@@ -259,11 +265,23 @@ config BPF_JIT
259 packet sniffing (libpcap/tcpdump). Note : Admin should enable 265 packet sniffing (libpcap/tcpdump). Note : Admin should enable
260 this feature changing /proc/sys/net/core/bpf_jit_enable 266 this feature changing /proc/sys/net/core/bpf_jit_enable
261 267
268config NET_FLOW_LIMIT
269 boolean
270 depends on RPS
271 default y
272 ---help---
273 The network stack has to drop packets when a receive processing CPU's
274 backlog reaches netdev_max_backlog. If a few out of many active flows
275 generate the vast majority of load, drop their traffic earlier to
276 maintain capacity for the other flows. This feature provides servers
277 with many clients some protection against DoS by a single (spoofed)
278 flow that greatly exceeds average workload.
279
262menu "Network testing" 280menu "Network testing"
263 281
264config NET_PKTGEN 282config NET_PKTGEN
265 tristate "Packet Generator (USE WITH CAUTION)" 283 tristate "Packet Generator (USE WITH CAUTION)"
266 depends on PROC_FS 284 depends on INET && PROC_FS
267 ---help--- 285 ---help---
268 This module will inject preconfigured packets, at a configurable 286 This module will inject preconfigured packets, at a configurable
269 rate, out of a given interface. It is used for network interface 287 rate, out of a given interface. It is used for network interface
diff --git a/net/Makefile b/net/Makefile
index 091e7b04f301..9492e8cb64e9 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -70,3 +70,4 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/
70obj-$(CONFIG_NFC) += nfc/ 70obj-$(CONFIG_NFC) += nfc/
71obj-$(CONFIG_OPENVSWITCH) += openvswitch/ 71obj-$(CONFIG_OPENVSWITCH) += openvswitch/
72obj-$(CONFIG_VSOCKETS) += vmw_vsock/ 72obj-$(CONFIG_VSOCKETS) += vmw_vsock/
73obj-$(CONFIG_NET_MPLS_GSO) += mpls/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 173a2e82f486..690356fa52b9 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -332,7 +332,7 @@ static void aarp_expire_timeout(unsigned long unused)
332static int aarp_device_event(struct notifier_block *this, unsigned long event, 332static int aarp_device_event(struct notifier_block *this, unsigned long event,
333 void *ptr) 333 void *ptr)
334{ 334{
335 struct net_device *dev = ptr; 335 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
336 int ct; 336 int ct;
337 337
338 if (!net_eq(dev_net(dev), &init_net)) 338 if (!net_eq(dev_net(dev), &init_net))
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index c30f3a0717fb..af46bc49e1e9 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -178,7 +178,7 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v)
178 at = at_sk(s); 178 at = at_sk(s);
179 179
180 seq_printf(seq, "%02X %04X:%02X:%02X %04X:%02X:%02X %08X:%08X " 180 seq_printf(seq, "%02X %04X:%02X:%02X %04X:%02X:%02X %08X:%08X "
181 "%02X %d\n", 181 "%02X %u\n",
182 s->sk_type, ntohs(at->src_net), at->src_node, at->src_port, 182 s->sk_type, ntohs(at->src_net), at->src_node, at->src_port,
183 ntohs(at->dest_net), at->dest_node, at->dest_port, 183 ntohs(at->dest_net), at->dest_node, at->dest_port,
184 sk_wmem_alloc_get(s), 184 sk_wmem_alloc_get(s),
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index ef12839a7cfe..7fee50d637f9 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -644,7 +644,7 @@ static inline void atalk_dev_down(struct net_device *dev)
644static int ddp_device_event(struct notifier_block *this, unsigned long event, 644static int ddp_device_event(struct notifier_block *this, unsigned long event,
645 void *ptr) 645 void *ptr)
646{ 646{
647 struct net_device *dev = ptr; 647 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
648 648
649 if (!net_eq(dev_net(dev), &init_net)) 649 if (!net_eq(dev_net(dev), &init_net))
650 return NOTIFY_DONE; 650 return NOTIFY_DONE;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 8ae3a7879335..8215f7cb170b 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -539,9 +539,9 @@ static int clip_create(int number)
539} 539}
540 540
541static int clip_device_event(struct notifier_block *this, unsigned long event, 541static int clip_device_event(struct notifier_block *this, unsigned long event,
542 void *arg) 542 void *ptr)
543{ 543{
544 struct net_device *dev = arg; 544 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
545 545
546 if (!net_eq(dev_net(dev), &init_net)) 546 if (!net_eq(dev_net(dev), &init_net))
547 return NOTIFY_DONE; 547 return NOTIFY_DONE;
@@ -575,6 +575,7 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event,
575 void *ifa) 575 void *ifa)
576{ 576{
577 struct in_device *in_dev; 577 struct in_device *in_dev;
578 struct netdev_notifier_info info;
578 579
579 in_dev = ((struct in_ifaddr *)ifa)->ifa_dev; 580 in_dev = ((struct in_ifaddr *)ifa)->ifa_dev;
580 /* 581 /*
@@ -583,7 +584,8 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event,
583 */ 584 */
584 if (event != NETDEV_UP) 585 if (event != NETDEV_UP)
585 return NOTIFY_DONE; 586 return NOTIFY_DONE;
586 return clip_device_event(this, NETDEV_CHANGE, in_dev->dev); 587 netdev_notifier_info_init(&info, in_dev->dev);
588 return clip_device_event(this, NETDEV_CHANGE, &info);
587} 589}
588 590
589static struct notifier_block clip_dev_notifier = { 591static struct notifier_block clip_dev_notifier = {
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index d4cc1be5c364..3af12755cd04 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -998,14 +998,12 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
998} 998}
999 999
1000static int mpoa_event_listener(struct notifier_block *mpoa_notifier, 1000static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
1001 unsigned long event, void *dev_ptr) 1001 unsigned long event, void *ptr)
1002{ 1002{
1003 struct net_device *dev; 1003 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1004 struct mpoa_client *mpc; 1004 struct mpoa_client *mpc;
1005 struct lec_priv *priv; 1005 struct lec_priv *priv;
1006 1006
1007 dev = dev_ptr;
1008
1009 if (!net_eq(dev_net(dev), &init_net)) 1007 if (!net_eq(dev_net(dev), &init_net))
1010 return NOTIFY_DONE; 1008 return NOTIFY_DONE;
1011 1009
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index e277e38f736b..4b4d2b779ec1 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -111,9 +111,9 @@ again:
111 * Handle device status changes. 111 * Handle device status changes.
112 */ 112 */
113static int ax25_device_event(struct notifier_block *this, unsigned long event, 113static int ax25_device_event(struct notifier_block *this, unsigned long event,
114 void *ptr) 114 void *ptr)
115{ 115{
116 struct net_device *dev = (struct net_device *)ptr; 116 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
117 117
118 if (!net_eq(dev_net(dev), &init_net)) 118 if (!net_eq(dev_net(dev), &init_net))
119 return NOTIFY_DONE; 119 return NOTIFY_DONE;
@@ -1974,7 +1974,7 @@ static struct packet_type ax25_packet_type __read_mostly = {
1974}; 1974};
1975 1975
1976static struct notifier_block ax25_dev_notifier = { 1976static struct notifier_block ax25_dev_notifier = {
1977 .notifier_call =ax25_device_event, 1977 .notifier_call = ax25_device_event,
1978}; 1978};
1979 1979
1980static int __init ax25_init(void) 1980static int __init ax25_init(void)
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index d5744b752511..919a5ce47515 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -29,7 +29,7 @@ static int min_proto[1], max_proto[] = { AX25_PROTO_MAX };
29static int min_ds_timeout[1], max_ds_timeout[] = {65535000}; 29static int min_ds_timeout[1], max_ds_timeout[] = {65535000};
30#endif 30#endif
31 31
32static const ctl_table ax25_param_table[] = { 32static const struct ctl_table ax25_param_table[] = {
33 { 33 {
34 .procname = "ip_default_mode", 34 .procname = "ip_default_mode",
35 .maxlen = sizeof(int), 35 .maxlen = sizeof(int),
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index acbac2a9c62f..489bb36f1b94 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -32,7 +32,6 @@ batman-adv-y += icmp_socket.o
32batman-adv-y += main.o 32batman-adv-y += main.o
33batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o 33batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
34batman-adv-y += originator.o 34batman-adv-y += originator.o
35batman-adv-y += ring_buffer.o
36batman-adv-y += routing.o 35batman-adv-y += routing.o
37batman-adv-y += send.o 36batman-adv-y += send.o
38batman-adv-y += soft-interface.o 37batman-adv-y += soft-interface.o
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 071f288b77a8..0a8a80cd4bf1 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -19,7 +19,6 @@
19 19
20#include "main.h" 20#include "main.h"
21#include "translation-table.h" 21#include "translation-table.h"
22#include "ring_buffer.h"
23#include "originator.h" 22#include "originator.h"
24#include "routing.h" 23#include "routing.h"
25#include "gateway_common.h" 24#include "gateway_common.h"
@@ -29,16 +28,74 @@
29#include "bat_algo.h" 28#include "bat_algo.h"
30#include "network-coding.h" 29#include "network-coding.h"
31 30
31
32/**
33 * batadv_dup_status - duplicate status
34 * @BATADV_NO_DUP: the packet is a duplicate
35 * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
36 * neighbor)
37 * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
38 * @BATADV_PROTECTED: originator is currently protected (after reboot)
39 */
40enum batadv_dup_status {
41 BATADV_NO_DUP = 0,
42 BATADV_ORIG_DUP,
43 BATADV_NEIGH_DUP,
44 BATADV_PROTECTED,
45};
46
47/**
48 * batadv_ring_buffer_set - update the ring buffer with the given value
49 * @lq_recv: pointer to the ring buffer
50 * @lq_index: index to store the value at
51 * @value: value to store in the ring buffer
52 */
53static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
54 uint8_t value)
55{
56 lq_recv[*lq_index] = value;
57 *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE;
58}
59
60/**
61 * batadv_ring_buffer_set - compute the average of all non-zero values stored
62 * in the given ring buffer
63 * @lq_recv: pointer to the ring buffer
64 *
65 * Returns computed average value.
66 */
67static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
68{
69 const uint8_t *ptr;
70 uint16_t count = 0, i = 0, sum = 0;
71
72 ptr = lq_recv;
73
74 while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) {
75 if (*ptr != 0) {
76 count++;
77 sum += *ptr;
78 }
79
80 i++;
81 ptr++;
82 }
83
84 if (count == 0)
85 return 0;
86
87 return (uint8_t)(sum / count);
88}
89
32static struct batadv_neigh_node * 90static struct batadv_neigh_node *
33batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, 91batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
34 const uint8_t *neigh_addr, 92 const uint8_t *neigh_addr,
35 struct batadv_orig_node *orig_node, 93 struct batadv_orig_node *orig_node,
36 struct batadv_orig_node *orig_neigh, __be32 seqno) 94 struct batadv_orig_node *orig_neigh)
37{ 95{
38 struct batadv_neigh_node *neigh_node; 96 struct batadv_neigh_node *neigh_node;
39 97
40 neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, 98 neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr);
41 ntohl(seqno));
42 if (!neigh_node) 99 if (!neigh_node)
43 goto out; 100 goto out;
44 101
@@ -413,18 +470,17 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
413 else 470 else
414 skb_size = packet_len; 471 skb_size = packet_len;
415 472
416 skb_size += ETH_HLEN + NET_IP_ALIGN; 473 skb_size += ETH_HLEN;
417 474
418 forw_packet_aggr->skb = dev_alloc_skb(skb_size); 475 forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
419 if (!forw_packet_aggr->skb) { 476 if (!forw_packet_aggr->skb) {
420 if (!own_packet) 477 if (!own_packet)
421 atomic_inc(&bat_priv->batman_queue_left); 478 atomic_inc(&bat_priv->batman_queue_left);
422 kfree(forw_packet_aggr); 479 kfree(forw_packet_aggr);
423 goto out; 480 goto out;
424 } 481 }
425 skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN); 482 forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
426 483 skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
427 INIT_HLIST_NODE(&forw_packet_aggr->list);
428 484
429 skb_buff = skb_put(forw_packet_aggr->skb, packet_len); 485 skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
430 forw_packet_aggr->packet_len = packet_len; 486 forw_packet_aggr->packet_len = packet_len;
@@ -590,6 +646,41 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
590 if_incoming, 0, batadv_iv_ogm_fwd_send_time()); 646 if_incoming, 0, batadv_iv_ogm_fwd_send_time());
591} 647}
592 648
649/**
650 * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for
651 * the given interface
652 * @hard_iface: the interface for which the windows have to be shifted
653 */
654static void
655batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
656{
657 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
658 struct batadv_hashtable *hash = bat_priv->orig_hash;
659 struct hlist_head *head;
660 struct batadv_orig_node *orig_node;
661 unsigned long *word;
662 uint32_t i;
663 size_t word_index;
664 uint8_t *w;
665
666 for (i = 0; i < hash->size; i++) {
667 head = &hash->table[i];
668
669 rcu_read_lock();
670 hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
671 spin_lock_bh(&orig_node->ogm_cnt_lock);
672 word_index = hard_iface->if_num * BATADV_NUM_WORDS;
673 word = &(orig_node->bcast_own[word_index]);
674
675 batadv_bit_get_packet(bat_priv, word, 1, 0);
676 w = &orig_node->bcast_own_sum[hard_iface->if_num];
677 *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE);
678 spin_unlock_bh(&orig_node->ogm_cnt_lock);
679 }
680 rcu_read_unlock();
681 }
682}
683
593static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) 684static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
594{ 685{
595 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 686 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -634,7 +725,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
634 batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS; 725 batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS;
635 } 726 }
636 727
637 batadv_slide_own_bcast_window(hard_iface); 728 batadv_iv_ogm_slide_own_bcast_window(hard_iface);
638 batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, 729 batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff,
639 hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, 730 hard_iface->bat_iv.ogm_buff_len, hard_iface, 1,
640 batadv_iv_ogm_emit_send_time(bat_priv)); 731 batadv_iv_ogm_emit_send_time(bat_priv));
@@ -650,7 +741,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
650 const struct batadv_ogm_packet *batadv_ogm_packet, 741 const struct batadv_ogm_packet *batadv_ogm_packet,
651 struct batadv_hard_iface *if_incoming, 742 struct batadv_hard_iface *if_incoming,
652 const unsigned char *tt_buff, 743 const unsigned char *tt_buff,
653 int is_duplicate) 744 enum batadv_dup_status dup_status)
654{ 745{
655 struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; 746 struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
656 struct batadv_neigh_node *router = NULL; 747 struct batadv_neigh_node *router = NULL;
@@ -670,13 +761,13 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
670 if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && 761 if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
671 tmp_neigh_node->if_incoming == if_incoming && 762 tmp_neigh_node->if_incoming == if_incoming &&
672 atomic_inc_not_zero(&tmp_neigh_node->refcount)) { 763 atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
673 if (neigh_node) 764 if (WARN(neigh_node, "too many matching neigh_nodes"))
674 batadv_neigh_node_free_ref(neigh_node); 765 batadv_neigh_node_free_ref(neigh_node);
675 neigh_node = tmp_neigh_node; 766 neigh_node = tmp_neigh_node;
676 continue; 767 continue;
677 } 768 }
678 769
679 if (is_duplicate) 770 if (dup_status != BATADV_NO_DUP)
680 continue; 771 continue;
681 772
682 spin_lock_bh(&tmp_neigh_node->lq_update_lock); 773 spin_lock_bh(&tmp_neigh_node->lq_update_lock);
@@ -696,8 +787,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
696 787
697 neigh_node = batadv_iv_ogm_neigh_new(if_incoming, 788 neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
698 ethhdr->h_source, 789 ethhdr->h_source,
699 orig_node, orig_tmp, 790 orig_node, orig_tmp);
700 batadv_ogm_packet->seqno);
701 791
702 batadv_orig_node_free_ref(orig_tmp); 792 batadv_orig_node_free_ref(orig_tmp);
703 if (!neigh_node) 793 if (!neigh_node)
@@ -718,7 +808,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
718 neigh_node->tq_avg = batadv_ring_buffer_avg(neigh_node->tq_recv); 808 neigh_node->tq_avg = batadv_ring_buffer_avg(neigh_node->tq_recv);
719 spin_unlock_bh(&neigh_node->lq_update_lock); 809 spin_unlock_bh(&neigh_node->lq_update_lock);
720 810
721 if (!is_duplicate) { 811 if (dup_status == BATADV_NO_DUP) {
722 orig_node->last_ttl = batadv_ogm_packet->header.ttl; 812 orig_node->last_ttl = batadv_ogm_packet->header.ttl;
723 neigh_node->last_ttl = batadv_ogm_packet->header.ttl; 813 neigh_node->last_ttl = batadv_ogm_packet->header.ttl;
724 } 814 }
@@ -829,8 +919,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
829 neigh_node = batadv_iv_ogm_neigh_new(if_incoming, 919 neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
830 orig_neigh_node->orig, 920 orig_neigh_node->orig,
831 orig_neigh_node, 921 orig_neigh_node,
832 orig_neigh_node, 922 orig_neigh_node);
833 batadv_ogm_packet->seqno);
834 923
835 if (!neigh_node) 924 if (!neigh_node)
836 goto out; 925 goto out;
@@ -902,15 +991,16 @@ out:
902 return ret; 991 return ret;
903} 992}
904 993
905/* processes a batman packet for all interfaces, adjusts the sequence number and 994/**
906 * finds out whether it is a duplicate. 995 * batadv_iv_ogm_update_seqnos - process a batman packet for all interfaces,
907 * returns: 996 * adjust the sequence number and find out whether it is a duplicate
908 * 1 the packet is a duplicate 997 * @ethhdr: ethernet header of the packet
909 * 0 the packet has not yet been received 998 * @batadv_ogm_packet: OGM packet to be considered
910 * -1 the packet is old and has been received while the seqno window 999 * @if_incoming: interface on which the OGM packet was received
911 * was protected. Caller should drop it. 1000 *
1001 * Returns duplicate status as enum batadv_dup_status
912 */ 1002 */
913static int 1003static enum batadv_dup_status
914batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, 1004batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
915 const struct batadv_ogm_packet *batadv_ogm_packet, 1005 const struct batadv_ogm_packet *batadv_ogm_packet,
916 const struct batadv_hard_iface *if_incoming) 1006 const struct batadv_hard_iface *if_incoming)
@@ -918,17 +1008,18 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
918 struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 1008 struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
919 struct batadv_orig_node *orig_node; 1009 struct batadv_orig_node *orig_node;
920 struct batadv_neigh_node *tmp_neigh_node; 1010 struct batadv_neigh_node *tmp_neigh_node;
921 int is_duplicate = 0; 1011 int is_dup;
922 int32_t seq_diff; 1012 int32_t seq_diff;
923 int need_update = 0; 1013 int need_update = 0;
924 int set_mark, ret = -1; 1014 int set_mark;
1015 enum batadv_dup_status ret = BATADV_NO_DUP;
925 uint32_t seqno = ntohl(batadv_ogm_packet->seqno); 1016 uint32_t seqno = ntohl(batadv_ogm_packet->seqno);
926 uint8_t *neigh_addr; 1017 uint8_t *neigh_addr;
927 uint8_t packet_count; 1018 uint8_t packet_count;
928 1019
929 orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig); 1020 orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig);
930 if (!orig_node) 1021 if (!orig_node)
931 return 0; 1022 return BATADV_NO_DUP;
932 1023
933 spin_lock_bh(&orig_node->ogm_cnt_lock); 1024 spin_lock_bh(&orig_node->ogm_cnt_lock);
934 seq_diff = seqno - orig_node->last_real_seqno; 1025 seq_diff = seqno - orig_node->last_real_seqno;
@@ -936,22 +1027,29 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
936 /* signalize caller that the packet is to be dropped. */ 1027 /* signalize caller that the packet is to be dropped. */
937 if (!hlist_empty(&orig_node->neigh_list) && 1028 if (!hlist_empty(&orig_node->neigh_list) &&
938 batadv_window_protected(bat_priv, seq_diff, 1029 batadv_window_protected(bat_priv, seq_diff,
939 &orig_node->batman_seqno_reset)) 1030 &orig_node->batman_seqno_reset)) {
1031 ret = BATADV_PROTECTED;
940 goto out; 1032 goto out;
1033 }
941 1034
942 rcu_read_lock(); 1035 rcu_read_lock();
943 hlist_for_each_entry_rcu(tmp_neigh_node, 1036 hlist_for_each_entry_rcu(tmp_neigh_node,
944 &orig_node->neigh_list, list) { 1037 &orig_node->neigh_list, list) {
945 is_duplicate |= batadv_test_bit(tmp_neigh_node->real_bits,
946 orig_node->last_real_seqno,
947 seqno);
948
949 neigh_addr = tmp_neigh_node->addr; 1038 neigh_addr = tmp_neigh_node->addr;
1039 is_dup = batadv_test_bit(tmp_neigh_node->real_bits,
1040 orig_node->last_real_seqno,
1041 seqno);
1042
950 if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && 1043 if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
951 tmp_neigh_node->if_incoming == if_incoming) 1044 tmp_neigh_node->if_incoming == if_incoming) {
952 set_mark = 1; 1045 set_mark = 1;
953 else 1046 if (is_dup)
1047 ret = BATADV_NEIGH_DUP;
1048 } else {
954 set_mark = 0; 1049 set_mark = 0;
1050 if (is_dup && (ret != BATADV_NEIGH_DUP))
1051 ret = BATADV_ORIG_DUP;
1052 }
955 1053
956 /* if the window moved, set the update flag. */ 1054 /* if the window moved, set the update flag. */
957 need_update |= batadv_bit_get_packet(bat_priv, 1055 need_update |= batadv_bit_get_packet(bat_priv,
@@ -971,8 +1069,6 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
971 orig_node->last_real_seqno = seqno; 1069 orig_node->last_real_seqno = seqno;
972 } 1070 }
973 1071
974 ret = is_duplicate;
975
976out: 1072out:
977 spin_unlock_bh(&orig_node->ogm_cnt_lock); 1073 spin_unlock_bh(&orig_node->ogm_cnt_lock);
978 batadv_orig_node_free_ref(orig_node); 1074 batadv_orig_node_free_ref(orig_node);
@@ -991,10 +1087,11 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
991 struct batadv_neigh_node *orig_neigh_router = NULL; 1087 struct batadv_neigh_node *orig_neigh_router = NULL;
992 int has_directlink_flag; 1088 int has_directlink_flag;
993 int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; 1089 int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0;
994 int is_broadcast = 0, is_bidirect; 1090 int is_bidirect;
995 bool is_single_hop_neigh = false; 1091 bool is_single_hop_neigh = false;
996 bool is_from_best_next_hop = false; 1092 bool is_from_best_next_hop = false;
997 int is_duplicate, sameseq, simlar_ttl; 1093 int sameseq, similar_ttl;
1094 enum batadv_dup_status dup_status;
998 uint32_t if_incoming_seqno; 1095 uint32_t if_incoming_seqno;
999 uint8_t *prev_sender; 1096 uint8_t *prev_sender;
1000 1097
@@ -1054,19 +1151,9 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
1054 if (batadv_compare_eth(batadv_ogm_packet->prev_sender, 1151 if (batadv_compare_eth(batadv_ogm_packet->prev_sender,
1055 hard_iface->net_dev->dev_addr)) 1152 hard_iface->net_dev->dev_addr))
1056 is_my_oldorig = 1; 1153 is_my_oldorig = 1;
1057
1058 if (is_broadcast_ether_addr(ethhdr->h_source))
1059 is_broadcast = 1;
1060 } 1154 }
1061 rcu_read_unlock(); 1155 rcu_read_unlock();
1062 1156
1063 if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) {
1064 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1065 "Drop packet: incompatible batman version (%i)\n",
1066 batadv_ogm_packet->header.version);
1067 return;
1068 }
1069
1070 if (is_my_addr) { 1157 if (is_my_addr) {
1071 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1158 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1072 "Drop packet: received my own broadcast (sender: %pM)\n", 1159 "Drop packet: received my own broadcast (sender: %pM)\n",
@@ -1074,13 +1161,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
1074 return; 1161 return;
1075 } 1162 }
1076 1163
1077 if (is_broadcast) {
1078 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1079 "Drop packet: ignoring all packets with broadcast source addr (sender: %pM)\n",
1080 ethhdr->h_source);
1081 return;
1082 }
1083
1084 if (is_my_orig) { 1164 if (is_my_orig) {
1085 unsigned long *word; 1165 unsigned long *word;
1086 int offset; 1166 int offset;
@@ -1138,10 +1218,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
1138 if (!orig_node) 1218 if (!orig_node)
1139 return; 1219 return;
1140 1220
1141 is_duplicate = batadv_iv_ogm_update_seqnos(ethhdr, batadv_ogm_packet, 1221 dup_status = batadv_iv_ogm_update_seqnos(ethhdr, batadv_ogm_packet,
1142 if_incoming); 1222 if_incoming);
1143 1223
1144 if (is_duplicate == -1) { 1224 if (dup_status == BATADV_PROTECTED) {
1145 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1225 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1146 "Drop packet: packet within seqno protection time (sender: %pM)\n", 1226 "Drop packet: packet within seqno protection time (sender: %pM)\n",
1147 ethhdr->h_source); 1227 ethhdr->h_source);
@@ -1211,11 +1291,12 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
1211 * seqno and similar ttl as the non-duplicate 1291 * seqno and similar ttl as the non-duplicate
1212 */ 1292 */
1213 sameseq = orig_node->last_real_seqno == ntohl(batadv_ogm_packet->seqno); 1293 sameseq = orig_node->last_real_seqno == ntohl(batadv_ogm_packet->seqno);
1214 simlar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->header.ttl; 1294 similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->header.ttl;
1215 if (is_bidirect && (!is_duplicate || (sameseq && simlar_ttl))) 1295 if (is_bidirect && ((dup_status == BATADV_NO_DUP) ||
1296 (sameseq && similar_ttl)))
1216 batadv_iv_ogm_orig_update(bat_priv, orig_node, ethhdr, 1297 batadv_iv_ogm_orig_update(bat_priv, orig_node, ethhdr,
1217 batadv_ogm_packet, if_incoming, 1298 batadv_ogm_packet, if_incoming,
1218 tt_buff, is_duplicate); 1299 tt_buff, dup_status);
1219 1300
1220 /* is single hop (direct) neighbor */ 1301 /* is single hop (direct) neighbor */
1221 if (is_single_hop_neigh) { 1302 if (is_single_hop_neigh) {
@@ -1236,7 +1317,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
1236 goto out_neigh; 1317 goto out_neigh;
1237 } 1318 }
1238 1319
1239 if (is_duplicate) { 1320 if (dup_status == BATADV_NEIGH_DUP) {
1240 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1321 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1241 "Drop packet: duplicate packet received\n"); 1322 "Drop packet: duplicate packet received\n");
1242 goto out_neigh; 1323 goto out_neigh;
@@ -1288,7 +1369,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
1288 skb->len + ETH_HLEN); 1369 skb->len + ETH_HLEN);
1289 1370
1290 packet_len = skb_headlen(skb); 1371 packet_len = skb_headlen(skb);
1291 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1372 ethhdr = eth_hdr(skb);
1292 packet_buff = skb->data; 1373 packet_buff = skb->data;
1293 batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; 1374 batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff;
1294 1375
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 379061c72549..264de88db320 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -180,7 +180,7 @@ static struct batadv_bla_claim
180 */ 180 */
181static struct batadv_bla_backbone_gw * 181static struct batadv_bla_backbone_gw *
182batadv_backbone_hash_find(struct batadv_priv *bat_priv, 182batadv_backbone_hash_find(struct batadv_priv *bat_priv,
183 uint8_t *addr, short vid) 183 uint8_t *addr, unsigned short vid)
184{ 184{
185 struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; 185 struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
186 struct hlist_head *head; 186 struct hlist_head *head;
@@ -257,7 +257,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
257 * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) 257 * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...)
258 */ 258 */
259static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, 259static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
260 short vid, int claimtype) 260 unsigned short vid, int claimtype)
261{ 261{
262 struct sk_buff *skb; 262 struct sk_buff *skb;
263 struct ethhdr *ethhdr; 263 struct ethhdr *ethhdr;
@@ -307,7 +307,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
307 */ 307 */
308 memcpy(ethhdr->h_source, mac, ETH_ALEN); 308 memcpy(ethhdr->h_source, mac, ETH_ALEN);
309 batadv_dbg(BATADV_DBG_BLA, bat_priv, 309 batadv_dbg(BATADV_DBG_BLA, bat_priv,
310 "bla_send_claim(): CLAIM %pM on vid %d\n", mac, vid); 310 "bla_send_claim(): CLAIM %pM on vid %d\n", mac,
311 BATADV_PRINT_VID(vid));
311 break; 312 break;
312 case BATADV_CLAIM_TYPE_UNCLAIM: 313 case BATADV_CLAIM_TYPE_UNCLAIM:
313 /* unclaim frame 314 /* unclaim frame
@@ -316,7 +317,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
316 memcpy(hw_src, mac, ETH_ALEN); 317 memcpy(hw_src, mac, ETH_ALEN);
317 batadv_dbg(BATADV_DBG_BLA, bat_priv, 318 batadv_dbg(BATADV_DBG_BLA, bat_priv,
318 "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, 319 "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac,
319 vid); 320 BATADV_PRINT_VID(vid));
320 break; 321 break;
321 case BATADV_CLAIM_TYPE_ANNOUNCE: 322 case BATADV_CLAIM_TYPE_ANNOUNCE:
322 /* announcement frame 323 /* announcement frame
@@ -325,7 +326,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
325 memcpy(hw_src, mac, ETH_ALEN); 326 memcpy(hw_src, mac, ETH_ALEN);
326 batadv_dbg(BATADV_DBG_BLA, bat_priv, 327 batadv_dbg(BATADV_DBG_BLA, bat_priv,
327 "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", 328 "bla_send_claim(): ANNOUNCE of %pM on vid %d\n",
328 ethhdr->h_source, vid); 329 ethhdr->h_source, BATADV_PRINT_VID(vid));
329 break; 330 break;
330 case BATADV_CLAIM_TYPE_REQUEST: 331 case BATADV_CLAIM_TYPE_REQUEST:
331 /* request frame 332 /* request frame
@@ -335,13 +336,15 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
335 memcpy(hw_src, mac, ETH_ALEN); 336 memcpy(hw_src, mac, ETH_ALEN);
336 memcpy(ethhdr->h_dest, mac, ETH_ALEN); 337 memcpy(ethhdr->h_dest, mac, ETH_ALEN);
337 batadv_dbg(BATADV_DBG_BLA, bat_priv, 338 batadv_dbg(BATADV_DBG_BLA, bat_priv,
338 "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n", 339 "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n",
339 ethhdr->h_source, ethhdr->h_dest, vid); 340 ethhdr->h_source, ethhdr->h_dest,
341 BATADV_PRINT_VID(vid));
340 break; 342 break;
341 } 343 }
342 344
343 if (vid != -1) 345 if (vid & BATADV_VLAN_HAS_TAG)
344 skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid); 346 skb = vlan_insert_tag(skb, htons(ETH_P_8021Q),
347 vid & VLAN_VID_MASK);
345 348
346 skb_reset_mac_header(skb); 349 skb_reset_mac_header(skb);
347 skb->protocol = eth_type_trans(skb, soft_iface); 350 skb->protocol = eth_type_trans(skb, soft_iface);
@@ -367,7 +370,7 @@ out:
367 */ 370 */
368static struct batadv_bla_backbone_gw * 371static struct batadv_bla_backbone_gw *
369batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, 372batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
370 short vid, bool own_backbone) 373 unsigned short vid, bool own_backbone)
371{ 374{
372 struct batadv_bla_backbone_gw *entry; 375 struct batadv_bla_backbone_gw *entry;
373 struct batadv_orig_node *orig_node; 376 struct batadv_orig_node *orig_node;
@@ -380,7 +383,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
380 383
381 batadv_dbg(BATADV_DBG_BLA, bat_priv, 384 batadv_dbg(BATADV_DBG_BLA, bat_priv,
382 "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", 385 "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n",
383 orig, vid); 386 orig, BATADV_PRINT_VID(vid));
384 387
385 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 388 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
386 if (!entry) 389 if (!entry)
@@ -434,7 +437,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
434static void 437static void
435batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, 438batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
436 struct batadv_hard_iface *primary_if, 439 struct batadv_hard_iface *primary_if,
437 short vid) 440 unsigned short vid)
438{ 441{
439 struct batadv_bla_backbone_gw *backbone_gw; 442 struct batadv_bla_backbone_gw *backbone_gw;
440 443
@@ -456,7 +459,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
456 */ 459 */
457static void batadv_bla_answer_request(struct batadv_priv *bat_priv, 460static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
458 struct batadv_hard_iface *primary_if, 461 struct batadv_hard_iface *primary_if,
459 short vid) 462 unsigned short vid)
460{ 463{
461 struct hlist_head *head; 464 struct hlist_head *head;
462 struct batadv_hashtable *hash; 465 struct batadv_hashtable *hash;
@@ -547,7 +550,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
547 * @backbone_gw: the backbone gateway which claims it 550 * @backbone_gw: the backbone gateway which claims it
548 */ 551 */
549static void batadv_bla_add_claim(struct batadv_priv *bat_priv, 552static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
550 const uint8_t *mac, const short vid, 553 const uint8_t *mac, const unsigned short vid,
551 struct batadv_bla_backbone_gw *backbone_gw) 554 struct batadv_bla_backbone_gw *backbone_gw)
552{ 555{
553 struct batadv_bla_claim *claim; 556 struct batadv_bla_claim *claim;
@@ -572,7 +575,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
572 atomic_set(&claim->refcount, 2); 575 atomic_set(&claim->refcount, 2);
573 batadv_dbg(BATADV_DBG_BLA, bat_priv, 576 batadv_dbg(BATADV_DBG_BLA, bat_priv,
574 "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", 577 "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
575 mac, vid); 578 mac, BATADV_PRINT_VID(vid));
576 hash_added = batadv_hash_add(bat_priv->bla.claim_hash, 579 hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
577 batadv_compare_claim, 580 batadv_compare_claim,
578 batadv_choose_claim, claim, 581 batadv_choose_claim, claim,
@@ -591,7 +594,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
591 594
592 batadv_dbg(BATADV_DBG_BLA, bat_priv, 595 batadv_dbg(BATADV_DBG_BLA, bat_priv,
593 "bla_add_claim(): changing ownership for %pM, vid %d\n", 596 "bla_add_claim(): changing ownership for %pM, vid %d\n",
594 mac, vid); 597 mac, BATADV_PRINT_VID(vid));
595 598
596 claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); 599 claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
597 batadv_backbone_gw_free_ref(claim->backbone_gw); 600 batadv_backbone_gw_free_ref(claim->backbone_gw);
@@ -611,7 +614,7 @@ claim_free_ref:
611 * given mac address and vid. 614 * given mac address and vid.
612 */ 615 */
613static void batadv_bla_del_claim(struct batadv_priv *bat_priv, 616static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
614 const uint8_t *mac, const short vid) 617 const uint8_t *mac, const unsigned short vid)
615{ 618{
616 struct batadv_bla_claim search_claim, *claim; 619 struct batadv_bla_claim search_claim, *claim;
617 620
@@ -622,7 +625,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
622 return; 625 return;
623 626
624 batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", 627 batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n",
625 mac, vid); 628 mac, BATADV_PRINT_VID(vid));
626 629
627 batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, 630 batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
628 batadv_choose_claim, claim); 631 batadv_choose_claim, claim);
@@ -637,7 +640,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
637/* check for ANNOUNCE frame, return 1 if handled */ 640/* check for ANNOUNCE frame, return 1 if handled */
638static int batadv_handle_announce(struct batadv_priv *bat_priv, 641static int batadv_handle_announce(struct batadv_priv *bat_priv,
639 uint8_t *an_addr, uint8_t *backbone_addr, 642 uint8_t *an_addr, uint8_t *backbone_addr,
640 short vid) 643 unsigned short vid)
641{ 644{
642 struct batadv_bla_backbone_gw *backbone_gw; 645 struct batadv_bla_backbone_gw *backbone_gw;
643 uint16_t crc; 646 uint16_t crc;
@@ -658,12 +661,13 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
658 661
659 batadv_dbg(BATADV_DBG_BLA, bat_priv, 662 batadv_dbg(BATADV_DBG_BLA, bat_priv,
660 "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", 663 "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
661 vid, backbone_gw->orig, crc); 664 BATADV_PRINT_VID(vid), backbone_gw->orig, crc);
662 665
663 if (backbone_gw->crc != crc) { 666 if (backbone_gw->crc != crc) {
664 batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, 667 batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
665 "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", 668 "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
666 backbone_gw->orig, backbone_gw->vid, 669 backbone_gw->orig,
670 BATADV_PRINT_VID(backbone_gw->vid),
667 backbone_gw->crc, crc); 671 backbone_gw->crc, crc);
668 672
669 batadv_bla_send_request(backbone_gw); 673 batadv_bla_send_request(backbone_gw);
@@ -685,7 +689,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
685static int batadv_handle_request(struct batadv_priv *bat_priv, 689static int batadv_handle_request(struct batadv_priv *bat_priv,
686 struct batadv_hard_iface *primary_if, 690 struct batadv_hard_iface *primary_if,
687 uint8_t *backbone_addr, 691 uint8_t *backbone_addr,
688 struct ethhdr *ethhdr, short vid) 692 struct ethhdr *ethhdr, unsigned short vid)
689{ 693{
690 /* check for REQUEST frame */ 694 /* check for REQUEST frame */
691 if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest)) 695 if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest))
@@ -699,7 +703,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv,
699 703
700 batadv_dbg(BATADV_DBG_BLA, bat_priv, 704 batadv_dbg(BATADV_DBG_BLA, bat_priv,
701 "handle_request(): REQUEST vid %d (sent by %pM)...\n", 705 "handle_request(): REQUEST vid %d (sent by %pM)...\n",
702 vid, ethhdr->h_source); 706 BATADV_PRINT_VID(vid), ethhdr->h_source);
703 707
704 batadv_bla_answer_request(bat_priv, primary_if, vid); 708 batadv_bla_answer_request(bat_priv, primary_if, vid);
705 return 1; 709 return 1;
@@ -709,7 +713,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv,
709static int batadv_handle_unclaim(struct batadv_priv *bat_priv, 713static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
710 struct batadv_hard_iface *primary_if, 714 struct batadv_hard_iface *primary_if,
711 uint8_t *backbone_addr, 715 uint8_t *backbone_addr,
712 uint8_t *claim_addr, short vid) 716 uint8_t *claim_addr, unsigned short vid)
713{ 717{
714 struct batadv_bla_backbone_gw *backbone_gw; 718 struct batadv_bla_backbone_gw *backbone_gw;
715 719
@@ -727,7 +731,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
727 /* this must be an UNCLAIM frame */ 731 /* this must be an UNCLAIM frame */
728 batadv_dbg(BATADV_DBG_BLA, bat_priv, 732 batadv_dbg(BATADV_DBG_BLA, bat_priv,
729 "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", 733 "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n",
730 claim_addr, vid, backbone_gw->orig); 734 claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig);
731 735
732 batadv_bla_del_claim(bat_priv, claim_addr, vid); 736 batadv_bla_del_claim(bat_priv, claim_addr, vid);
733 batadv_backbone_gw_free_ref(backbone_gw); 737 batadv_backbone_gw_free_ref(backbone_gw);
@@ -738,7 +742,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
738static int batadv_handle_claim(struct batadv_priv *bat_priv, 742static int batadv_handle_claim(struct batadv_priv *bat_priv,
739 struct batadv_hard_iface *primary_if, 743 struct batadv_hard_iface *primary_if,
740 uint8_t *backbone_addr, uint8_t *claim_addr, 744 uint8_t *backbone_addr, uint8_t *claim_addr,
741 short vid) 745 unsigned short vid)
742{ 746{
743 struct batadv_bla_backbone_gw *backbone_gw; 747 struct batadv_bla_backbone_gw *backbone_gw;
744 748
@@ -861,14 +865,15 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
861 struct batadv_bla_claim_dst *bla_dst; 865 struct batadv_bla_claim_dst *bla_dst;
862 uint16_t proto; 866 uint16_t proto;
863 int headlen; 867 int headlen;
864 short vid = -1; 868 unsigned short vid = BATADV_NO_FLAGS;
865 int ret; 869 int ret;
866 870
867 ethhdr = (struct ethhdr *)skb_mac_header(skb); 871 ethhdr = eth_hdr(skb);
868 872
869 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { 873 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) {
870 vhdr = (struct vlan_ethhdr *)ethhdr; 874 vhdr = (struct vlan_ethhdr *)ethhdr;
871 vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; 875 vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
876 vid |= BATADV_VLAN_HAS_TAG;
872 proto = ntohs(vhdr->h_vlan_encapsulated_proto); 877 proto = ntohs(vhdr->h_vlan_encapsulated_proto);
873 headlen = sizeof(*vhdr); 878 headlen = sizeof(*vhdr);
874 } else { 879 } else {
@@ -885,7 +890,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
885 return 0; 890 return 0;
886 891
887 /* pskb_may_pull() may have modified the pointers, get ethhdr again */ 892 /* pskb_may_pull() may have modified the pointers, get ethhdr again */
888 ethhdr = (struct ethhdr *)skb_mac_header(skb); 893 ethhdr = eth_hdr(skb);
889 arphdr = (struct arphdr *)((uint8_t *)ethhdr + headlen); 894 arphdr = (struct arphdr *)((uint8_t *)ethhdr + headlen);
890 895
891 /* Check whether the ARP frame carries a valid 896 /* Check whether the ARP frame carries a valid
@@ -910,7 +915,8 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
910 if (ret == 1) 915 if (ret == 1)
911 batadv_dbg(BATADV_DBG_BLA, bat_priv, 916 batadv_dbg(BATADV_DBG_BLA, bat_priv,
912 "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", 917 "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
913 ethhdr->h_source, vid, hw_src, hw_dst); 918 ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src,
919 hw_dst);
914 920
915 if (ret < 2) 921 if (ret < 2)
916 return ret; 922 return ret;
@@ -945,7 +951,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
945 951
946 batadv_dbg(BATADV_DBG_BLA, bat_priv, 952 batadv_dbg(BATADV_DBG_BLA, bat_priv,
947 "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", 953 "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
948 ethhdr->h_source, vid, hw_src, hw_dst); 954 ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst);
949 return 1; 955 return 1;
950} 956}
951 957
@@ -1067,6 +1073,10 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
1067 group = htons(crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN)); 1073 group = htons(crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN));
1068 bat_priv->bla.claim_dest.group = group; 1074 bat_priv->bla.claim_dest.group = group;
1069 1075
1076 /* purge everything when bridge loop avoidance is turned off */
1077 if (!atomic_read(&bat_priv->bridge_loop_avoidance))
1078 oldif = NULL;
1079
1070 if (!oldif) { 1080 if (!oldif) {
1071 batadv_bla_purge_claims(bat_priv, NULL, 1); 1081 batadv_bla_purge_claims(bat_priv, NULL, 1);
1072 batadv_bla_purge_backbone_gw(bat_priv, 1); 1082 batadv_bla_purge_backbone_gw(bat_priv, 1);
@@ -1358,7 +1368,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
1358 struct ethhdr *ethhdr; 1368 struct ethhdr *ethhdr;
1359 struct vlan_ethhdr *vhdr; 1369 struct vlan_ethhdr *vhdr;
1360 struct batadv_bla_backbone_gw *backbone_gw; 1370 struct batadv_bla_backbone_gw *backbone_gw;
1361 short vid = -1; 1371 unsigned short vid = BATADV_NO_FLAGS;
1362 1372
1363 if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) 1373 if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance))
1364 return 0; 1374 return 0;
@@ -1375,6 +1385,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
1375 1385
1376 vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); 1386 vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size);
1377 vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; 1387 vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
1388 vid |= BATADV_VLAN_HAS_TAG;
1378 } 1389 }
1379 1390
1380 /* see if this originator is a backbone gw for this VLAN */ 1391 /* see if this originator is a backbone gw for this VLAN */
@@ -1424,15 +1435,15 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
1424 * returns 1, otherwise it returns 0 and the caller shall further 1435 * returns 1, otherwise it returns 0 and the caller shall further
1425 * process the skb. 1436 * process the skb.
1426 */ 1437 */
1427int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, 1438int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
1428 bool is_bcast) 1439 unsigned short vid, bool is_bcast)
1429{ 1440{
1430 struct ethhdr *ethhdr; 1441 struct ethhdr *ethhdr;
1431 struct batadv_bla_claim search_claim, *claim = NULL; 1442 struct batadv_bla_claim search_claim, *claim = NULL;
1432 struct batadv_hard_iface *primary_if; 1443 struct batadv_hard_iface *primary_if;
1433 int ret; 1444 int ret;
1434 1445
1435 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1446 ethhdr = eth_hdr(skb);
1436 1447
1437 primary_if = batadv_primary_if_get_selected(bat_priv); 1448 primary_if = batadv_primary_if_get_selected(bat_priv);
1438 if (!primary_if) 1449 if (!primary_if)
@@ -1518,8 +1529,11 @@ out:
1518 * in these cases, the skb is further handled by this function and 1529 * in these cases, the skb is further handled by this function and
1519 * returns 1, otherwise it returns 0 and the caller shall further 1530 * returns 1, otherwise it returns 0 and the caller shall further
1520 * process the skb. 1531 * process the skb.
1532 *
1533 * This call might reallocate skb data.
1521 */ 1534 */
1522int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) 1535int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
1536 unsigned short vid)
1523{ 1537{
1524 struct ethhdr *ethhdr; 1538 struct ethhdr *ethhdr;
1525 struct batadv_bla_claim search_claim, *claim = NULL; 1539 struct batadv_bla_claim search_claim, *claim = NULL;
@@ -1539,7 +1553,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid)
1539 if (batadv_bla_process_claim(bat_priv, primary_if, skb)) 1553 if (batadv_bla_process_claim(bat_priv, primary_if, skb))
1540 goto handled; 1554 goto handled;
1541 1555
1542 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1556 ethhdr = eth_hdr(skb);
1543 1557
1544 if (unlikely(atomic_read(&bat_priv->bla.num_requests))) 1558 if (unlikely(atomic_read(&bat_priv->bla.num_requests)))
1545 /* don't allow broadcasts while requests are in flight */ 1559 /* don't allow broadcasts while requests are in flight */
@@ -1623,8 +1637,8 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
1623 hlist_for_each_entry_rcu(claim, head, hash_entry) { 1637 hlist_for_each_entry_rcu(claim, head, hash_entry) {
1624 is_own = batadv_compare_eth(claim->backbone_gw->orig, 1638 is_own = batadv_compare_eth(claim->backbone_gw->orig,
1625 primary_addr); 1639 primary_addr);
1626 seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n", 1640 seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n",
1627 claim->addr, claim->vid, 1641 claim->addr, BATADV_PRINT_VID(claim->vid),
1628 claim->backbone_gw->orig, 1642 claim->backbone_gw->orig,
1629 (is_own ? 'x' : ' '), 1643 (is_own ? 'x' : ' '),
1630 claim->backbone_gw->crc); 1644 claim->backbone_gw->crc);
@@ -1676,10 +1690,10 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
1676 if (is_own) 1690 if (is_own)
1677 continue; 1691 continue;
1678 1692
1679 seq_printf(seq, 1693 seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n",
1680 " * %pM on % 5d % 4i.%03is (%#.4x)\n", 1694 backbone_gw->orig,
1681 backbone_gw->orig, backbone_gw->vid, 1695 BATADV_PRINT_VID(backbone_gw->vid), secs,
1682 secs, msecs, backbone_gw->crc); 1696 msecs, backbone_gw->crc);
1683 } 1697 }
1684 rcu_read_unlock(); 1698 rcu_read_unlock();
1685 } 1699 }
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index dea2fbc5d98d..4b102e71e5bd 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -21,9 +21,10 @@
21#define _NET_BATMAN_ADV_BLA_H_ 21#define _NET_BATMAN_ADV_BLA_H_
22 22
23#ifdef CONFIG_BATMAN_ADV_BLA 23#ifdef CONFIG_BATMAN_ADV_BLA
24int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, 24int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
25 bool is_bcast); 25 unsigned short vid, bool is_bcast);
26int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid); 26int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
27 unsigned short vid);
27int batadv_bla_is_backbone_gw(struct sk_buff *skb, 28int batadv_bla_is_backbone_gw(struct sk_buff *skb,
28 struct batadv_orig_node *orig_node, int hdr_size); 29 struct batadv_orig_node *orig_node, int hdr_size);
29int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); 30int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
@@ -42,13 +43,14 @@ void batadv_bla_free(struct batadv_priv *bat_priv);
42#else /* ifdef CONFIG_BATMAN_ADV_BLA */ 43#else /* ifdef CONFIG_BATMAN_ADV_BLA */
43 44
44static inline int batadv_bla_rx(struct batadv_priv *bat_priv, 45static inline int batadv_bla_rx(struct batadv_priv *bat_priv,
45 struct sk_buff *skb, short vid, bool is_bcast) 46 struct sk_buff *skb, unsigned short vid,
47 bool is_bcast)
46{ 48{
47 return 0; 49 return 0;
48} 50}
49 51
50static inline int batadv_bla_tx(struct batadv_priv *bat_priv, 52static inline int batadv_bla_tx(struct batadv_priv *bat_priv,
51 struct sk_buff *skb, short vid) 53 struct sk_buff *skb, unsigned short vid)
52{ 54{
53 return 0; 55 return 0;
54} 56}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 239992021b1d..06345d401588 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -45,9 +45,9 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
45} 45}
46 46
47/** 47/**
48 * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly 48 * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly
49 * free it 49 * free it
50 * @dat_entry: the oentry to free 50 * @dat_entry: the entry to free
51 */ 51 */
52static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) 52static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry)
53{ 53{
@@ -56,10 +56,10 @@ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry)
56} 56}
57 57
58/** 58/**
59 * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not 59 * batadv_dat_to_purge - check whether a dat_entry has to be purged or not
60 * @dat_entry: the entry to check 60 * @dat_entry: the entry to check
61 * 61 *
62 * Returns true if the entry has to be purged now, false otherwise 62 * Returns true if the entry has to be purged now, false otherwise.
63 */ 63 */
64static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) 64static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
65{ 65{
@@ -75,8 +75,8 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
75 * returns a boolean value: true is the entry has to be deleted, 75 * returns a boolean value: true is the entry has to be deleted,
76 * false otherwise 76 * false otherwise
77 * 77 *
78 * Loops over each entry in the DAT local storage and delete it if and only if 78 * Loops over each entry in the DAT local storage and deletes it if and only if
79 * the to_purge function passed as argument returns true 79 * the to_purge function passed as argument returns true.
80 */ 80 */
81static void __batadv_dat_purge(struct batadv_priv *bat_priv, 81static void __batadv_dat_purge(struct batadv_priv *bat_priv,
82 bool (*to_purge)(struct batadv_dat_entry *)) 82 bool (*to_purge)(struct batadv_dat_entry *))
@@ -97,7 +97,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
97 spin_lock_bh(list_lock); 97 spin_lock_bh(list_lock);
98 hlist_for_each_entry_safe(dat_entry, node_tmp, head, 98 hlist_for_each_entry_safe(dat_entry, node_tmp, head,
99 hash_entry) { 99 hash_entry) {
100 /* if an helper function has been passed as parameter, 100 /* if a helper function has been passed as parameter,
101 * ask it if the entry has to be purged or not 101 * ask it if the entry has to be purged or not
102 */ 102 */
103 if (to_purge && !to_purge(dat_entry)) 103 if (to_purge && !to_purge(dat_entry))
@@ -134,7 +134,7 @@ static void batadv_dat_purge(struct work_struct *work)
134 * @node: node in the local table 134 * @node: node in the local table
135 * @data2: second object to compare the node to 135 * @data2: second object to compare the node to
136 * 136 *
137 * Returns 1 if the two entry are the same, 0 otherwise 137 * Returns 1 if the two entries are the same, 0 otherwise.
138 */ 138 */
139static int batadv_compare_dat(const struct hlist_node *node, const void *data2) 139static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
140{ 140{
@@ -149,7 +149,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
149 * @skb: ARP packet 149 * @skb: ARP packet
150 * @hdr_size: size of the possible header before the ARP packet 150 * @hdr_size: size of the possible header before the ARP packet
151 * 151 *
152 * Returns the value of the hw_src field in the ARP packet 152 * Returns the value of the hw_src field in the ARP packet.
153 */ 153 */
154static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) 154static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
155{ 155{
@@ -166,7 +166,7 @@ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
166 * @skb: ARP packet 166 * @skb: ARP packet
167 * @hdr_size: size of the possible header before the ARP packet 167 * @hdr_size: size of the possible header before the ARP packet
168 * 168 *
169 * Returns the value of the ip_src field in the ARP packet 169 * Returns the value of the ip_src field in the ARP packet.
170 */ 170 */
171static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) 171static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
172{ 172{
@@ -178,7 +178,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
178 * @skb: ARP packet 178 * @skb: ARP packet
179 * @hdr_size: size of the possible header before the ARP packet 179 * @hdr_size: size of the possible header before the ARP packet
180 * 180 *
181 * Returns the value of the hw_dst field in the ARP packet 181 * Returns the value of the hw_dst field in the ARP packet.
182 */ 182 */
183static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) 183static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
184{ 184{
@@ -190,7 +190,7 @@ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
190 * @skb: ARP packet 190 * @skb: ARP packet
191 * @hdr_size: size of the possible header before the ARP packet 191 * @hdr_size: size of the possible header before the ARP packet
192 * 192 *
193 * Returns the value of the ip_dst field in the ARP packet 193 * Returns the value of the ip_dst field in the ARP packet.
194 */ 194 */
195static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) 195static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
196{ 196{
@@ -202,7 +202,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
202 * @data: data to hash 202 * @data: data to hash
203 * @size: size of the hash table 203 * @size: size of the hash table
204 * 204 *
205 * Returns the selected index in the hash table for the given data 205 * Returns the selected index in the hash table for the given data.
206 */ 206 */
207static uint32_t batadv_hash_dat(const void *data, uint32_t size) 207static uint32_t batadv_hash_dat(const void *data, uint32_t size)
208{ 208{
@@ -224,12 +224,12 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size)
224} 224}
225 225
226/** 226/**
227 * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash 227 * batadv_dat_entry_hash_find - look for a given dat_entry in the local hash
228 * table 228 * table
229 * @bat_priv: the bat priv with all the soft interface information 229 * @bat_priv: the bat priv with all the soft interface information
230 * @ip: search key 230 * @ip: search key
231 * 231 *
232 * Returns the dat_entry if found, NULL otherwise 232 * Returns the dat_entry if found, NULL otherwise.
233 */ 233 */
234static struct batadv_dat_entry * 234static struct batadv_dat_entry *
235batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) 235batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip)
@@ -343,9 +343,6 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
343 if (hdr_size == 0) 343 if (hdr_size == 0)
344 return; 344 return;
345 345
346 /* if the ARP packet is encapsulated in a batman packet, let's print
347 * some debug messages
348 */
349 unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; 346 unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
350 347
351 switch (unicast_4addr_packet->u.header.packet_type) { 348 switch (unicast_4addr_packet->u.header.packet_type) {
@@ -409,7 +406,8 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
409 * @candidate: orig_node under evaluation 406 * @candidate: orig_node under evaluation
410 * @max_orig_node: last selected candidate 407 * @max_orig_node: last selected candidate
411 * 408 *
412 * Returns true if the node has been elected as next candidate or false othrwise 409 * Returns true if the node has been elected as next candidate or false
410 * otherwise.
413 */ 411 */
414static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, 412static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
415 int select, batadv_dat_addr_t tmp_max, 413 int select, batadv_dat_addr_t tmp_max,
@@ -472,7 +470,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
472 */ 470 */
473 cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND; 471 cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND;
474 472
475 /* iterate over the originator list and find the node with closest 473 /* iterate over the originator list and find the node with the closest
476 * dat_address which has not been selected yet 474 * dat_address which has not been selected yet
477 */ 475 */
478 for (i = 0; i < hash->size; i++) { 476 for (i = 0; i < hash->size; i++) {
@@ -480,7 +478,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
480 478
481 rcu_read_lock(); 479 rcu_read_lock();
482 hlist_for_each_entry_rcu(orig_node, head, hash_entry) { 480 hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
483 /* the dht space is a ring and addresses are unsigned */ 481 /* the dht space is a ring using unsigned addresses */
484 tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + 482 tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr +
485 ip_key; 483 ip_key;
486 484
@@ -512,7 +510,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
512} 510}
513 511
514/** 512/**
515 * batadv_dat_select_candidates - selects the nodes which the DHT message has to 513 * batadv_dat_select_candidates - select the nodes which the DHT message has to
516 * be sent to 514 * be sent to
517 * @bat_priv: the bat priv with all the soft interface information 515 * @bat_priv: the bat priv with all the soft interface information
518 * @ip_dst: ipv4 to look up in the DHT 516 * @ip_dst: ipv4 to look up in the DHT
@@ -521,7 +519,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
521 * closest values (from the LEFT, with wrap around if needed) then the hash 519 * closest values (from the LEFT, with wrap around if needed) then the hash
522 * value of the key. ip_dst is the key. 520 * value of the key. ip_dst is the key.
523 * 521 *
524 * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM 522 * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM.
525 */ 523 */
526static struct batadv_dat_candidate * 524static struct batadv_dat_candidate *
527batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) 525batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
@@ -558,10 +556,11 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
558 * @ip: the DHT key 556 * @ip: the DHT key
559 * @packet_subtype: unicast4addr packet subtype to use 557 * @packet_subtype: unicast4addr packet subtype to use
560 * 558 *
561 * In this function the skb is copied by means of pskb_copy() and is sent as 559 * This function copies the skb with pskb_copy() and is sent as unicast packet
562 * unicast packet to each of the selected candidates 560 * to each of the selected candidates.
563 * 561 *
564 * Returns true if the packet is sent to at least one candidate, false otherwise 562 * Returns true if the packet is sent to at least one candidate, false
563 * otherwise.
565 */ 564 */
566static bool batadv_dat_send_data(struct batadv_priv *bat_priv, 565static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
567 struct sk_buff *skb, __be32 ip, 566 struct sk_buff *skb, __be32 ip,
@@ -727,7 +726,7 @@ out:
727 * @skb: packet to analyse 726 * @skb: packet to analyse
728 * @hdr_size: size of the possible header before the ARP packet in the skb 727 * @hdr_size: size of the possible header before the ARP packet in the skb
729 * 728 *
730 * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise 729 * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise.
731 */ 730 */
732static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, 731static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
733 struct sk_buff *skb, int hdr_size) 732 struct sk_buff *skb, int hdr_size)
@@ -754,9 +753,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
754 753
755 arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN); 754 arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN);
756 755
757 /* Check whether the ARP packet carries a valid 756 /* check whether the ARP packet carries a valid IP information */
758 * IP information
759 */
760 if (arphdr->ar_hrd != htons(ARPHRD_ETHER)) 757 if (arphdr->ar_hrd != htons(ARPHRD_ETHER))
761 goto out; 758 goto out;
762 759
@@ -784,7 +781,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
784 if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src)) 781 if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src))
785 goto out; 782 goto out;
786 783
787 /* we don't care about the destination MAC address in ARP requests */ 784 /* don't care about the destination MAC address in ARP requests */
788 if (arphdr->ar_op != htons(ARPOP_REQUEST)) { 785 if (arphdr->ar_op != htons(ARPOP_REQUEST)) {
789 hw_dst = batadv_arp_hw_dst(skb, hdr_size); 786 hw_dst = batadv_arp_hw_dst(skb, hdr_size);
790 if (is_zero_ether_addr(hw_dst) || 787 if (is_zero_ether_addr(hw_dst) ||
@@ -804,8 +801,8 @@ out:
804 * @skb: packet to check 801 * @skb: packet to check
805 * 802 *
806 * Returns true if the message has been sent to the dht candidates, false 803 * Returns true if the message has been sent to the dht candidates, false
807 * otherwise. In case of true the message has to be enqueued to permit the 804 * otherwise. In case of a positive return value the message has to be enqueued
808 * fallback 805 * to permit the fallback.
809 */ 806 */
810bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, 807bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
811 struct sk_buff *skb) 808 struct sk_buff *skb)
@@ -867,7 +864,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
867 batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); 864 batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
868 ret = true; 865 ret = true;
869 } else { 866 } else {
870 /* Send the request on the DHT */ 867 /* Send the request to the DHT */
871 ret = batadv_dat_send_data(bat_priv, skb, ip_dst, 868 ret = batadv_dat_send_data(bat_priv, skb, ip_dst,
872 BATADV_P_DAT_DHT_GET); 869 BATADV_P_DAT_DHT_GET);
873 } 870 }
@@ -884,7 +881,7 @@ out:
884 * @skb: packet to check 881 * @skb: packet to check
885 * @hdr_size: size of the encapsulation header 882 * @hdr_size: size of the encapsulation header
886 * 883 *
887 * Returns true if the request has been answered, false otherwise 884 * Returns true if the request has been answered, false otherwise.
888 */ 885 */
889bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, 886bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
890 struct sk_buff *skb, int hdr_size) 887 struct sk_buff *skb, int hdr_size)
@@ -924,10 +921,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
924 if (!skb_new) 921 if (!skb_new)
925 goto out; 922 goto out;
926 923
927 /* to preserve backwards compatibility, here the node has to answer 924 /* To preserve backwards compatibility, the node has choose the outgoing
928 * using the same packet type it received for the request. This is due 925 * format based on the incoming request packet type. The assumption is
929 * to that if a node is not using the 4addr packet format it may not 926 * that a node not using the 4addr packet format doesn't support it.
930 * support it.
931 */ 927 */
932 if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) 928 if (hdr_size == sizeof(struct batadv_unicast_4addr_packet))
933 err = batadv_unicast_4addr_send_skb(bat_priv, skb_new, 929 err = batadv_unicast_4addr_send_skb(bat_priv, skb_new,
@@ -977,7 +973,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
977 batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); 973 batadv_dat_entry_add(bat_priv, ip_dst, hw_dst);
978 974
979 /* Send the ARP reply to the candidates for both the IP addresses that 975 /* Send the ARP reply to the candidates for both the IP addresses that
980 * the node got within the ARP reply 976 * the node obtained from the ARP reply
981 */ 977 */
982 batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); 978 batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT);
983 batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); 979 batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT);
@@ -987,7 +983,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
987 * DAT storage only 983 * DAT storage only
988 * @bat_priv: the bat priv with all the soft interface information 984 * @bat_priv: the bat priv with all the soft interface information
989 * @skb: packet to check 985 * @skb: packet to check
990 * @hdr_size: siaze of the encapsulation header 986 * @hdr_size: size of the encapsulation header
991 */ 987 */
992bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, 988bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
993 struct sk_buff *skb, int hdr_size) 989 struct sk_buff *skb, int hdr_size)
@@ -1031,11 +1027,11 @@ out:
1031 1027
1032/** 1028/**
1033 * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped 1029 * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped
1034 * (because the node has already got the reply via DAT) or not 1030 * (because the node has already obtained the reply via DAT) or not
1035 * @bat_priv: the bat priv with all the soft interface information 1031 * @bat_priv: the bat priv with all the soft interface information
1036 * @forw_packet: the broadcast packet 1032 * @forw_packet: the broadcast packet
1037 * 1033 *
1038 * Returns true if the node can drop the packet, false otherwise 1034 * Returns true if the node can drop the packet, false otherwise.
1039 */ 1035 */
1040bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, 1036bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
1041 struct batadv_forw_packet *forw_packet) 1037 struct batadv_forw_packet *forw_packet)
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index f105219f4a4b..1ce4b8763ef2 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -190,6 +190,33 @@ next:
190 return curr_gw; 190 return curr_gw;
191} 191}
192 192
193/**
194 * batadv_gw_check_client_stop - check if client mode has been switched off
195 * @bat_priv: the bat priv with all the soft interface information
196 *
197 * This function assumes the caller has checked that the gw state *is actually
198 * changing*. This function is not supposed to be called when there is no state
199 * change.
200 */
201void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
202{
203 struct batadv_gw_node *curr_gw;
204
205 if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
206 return;
207
208 curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
209 if (!curr_gw)
210 return;
211
212 /* if batman-adv is switching the gw client mode off and a gateway was
213 * already selected, send a DEL uevent
214 */
215 batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL);
216
217 batadv_gw_node_free_ref(curr_gw);
218}
219
193void batadv_gw_election(struct batadv_priv *bat_priv) 220void batadv_gw_election(struct batadv_priv *bat_priv)
194{ 221{
195 struct batadv_gw_node *curr_gw = NULL, *next_gw = NULL; 222 struct batadv_gw_node *curr_gw = NULL, *next_gw = NULL;
@@ -508,6 +535,7 @@ out:
508 return 0; 535 return 0;
509} 536}
510 537
538/* this call might reallocate skb data */
511static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len) 539static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len)
512{ 540{
513 int ret = false; 541 int ret = false;
@@ -568,6 +596,7 @@ out:
568 return ret; 596 return ret;
569} 597}
570 598
599/* this call might reallocate skb data */
571bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) 600bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len)
572{ 601{
573 struct ethhdr *ethhdr; 602 struct ethhdr *ethhdr;
@@ -619,6 +648,12 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len)
619 648
620 if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr))) 649 if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr)))
621 return false; 650 return false;
651
652 /* skb->data might have been reallocated by pskb_may_pull() */
653 ethhdr = (struct ethhdr *)skb->data;
654 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q)
655 ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN);
656
622 udphdr = (struct udphdr *)(skb->data + *header_len); 657 udphdr = (struct udphdr *)(skb->data + *header_len);
623 *header_len += sizeof(*udphdr); 658 *header_len += sizeof(*udphdr);
624 659
@@ -634,12 +669,14 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len)
634 return true; 669 return true;
635} 670}
636 671
672/* this call might reallocate skb data */
637bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, 673bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
638 struct sk_buff *skb, struct ethhdr *ethhdr) 674 struct sk_buff *skb)
639{ 675{
640 struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL; 676 struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL;
641 struct batadv_orig_node *orig_dst_node = NULL; 677 struct batadv_orig_node *orig_dst_node = NULL;
642 struct batadv_gw_node *curr_gw = NULL; 678 struct batadv_gw_node *curr_gw = NULL;
679 struct ethhdr *ethhdr;
643 bool ret, out_of_range = false; 680 bool ret, out_of_range = false;
644 unsigned int header_len = 0; 681 unsigned int header_len = 0;
645 uint8_t curr_tq_avg; 682 uint8_t curr_tq_avg;
@@ -648,6 +685,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
648 if (!ret) 685 if (!ret)
649 goto out; 686 goto out;
650 687
688 ethhdr = (struct ethhdr *)skb->data;
651 orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source, 689 orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
652 ethhdr->h_dest); 690 ethhdr->h_dest);
653 if (!orig_dst_node) 691 if (!orig_dst_node)
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 039902dca4a6..ceef4ebe8bcd 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -20,6 +20,7 @@
20#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ 20#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
21#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ 21#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
22 22
23void batadv_gw_check_client_stop(struct batadv_priv *bat_priv);
23void batadv_gw_deselect(struct batadv_priv *bat_priv); 24void batadv_gw_deselect(struct batadv_priv *bat_priv);
24void batadv_gw_election(struct batadv_priv *bat_priv); 25void batadv_gw_election(struct batadv_priv *bat_priv);
25struct batadv_orig_node * 26struct batadv_orig_node *
@@ -34,7 +35,6 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv,
34void batadv_gw_node_purge(struct batadv_priv *bat_priv); 35void batadv_gw_node_purge(struct batadv_priv *bat_priv);
35int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); 36int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset);
36bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len); 37bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len);
37bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, 38bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb);
38 struct sk_buff *skb, struct ethhdr *ethhdr);
39 39
40#endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ 40#endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 522243aff2f3..c478e6bcf89b 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -117,6 +117,58 @@ static int batadv_is_valid_iface(const struct net_device *net_dev)
117 return 1; 117 return 1;
118} 118}
119 119
120/**
121 * batadv_is_wifi_netdev - check if the given net_device struct is a wifi
122 * interface
123 * @net_device: the device to check
124 *
125 * Returns true if the net device is a 802.11 wireless device, false otherwise.
126 */
127static bool batadv_is_wifi_netdev(struct net_device *net_device)
128{
129#ifdef CONFIG_WIRELESS_EXT
130 /* pre-cfg80211 drivers have to implement WEXT, so it is possible to
131 * check for wireless_handlers != NULL
132 */
133 if (net_device->wireless_handlers)
134 return true;
135#endif
136
137 /* cfg80211 drivers have to set ieee80211_ptr */
138 if (net_device->ieee80211_ptr)
139 return true;
140
141 return false;
142}
143
144/**
145 * batadv_is_wifi_iface - check if the given interface represented by ifindex
146 * is a wifi interface
147 * @ifindex: interface index to check
148 *
149 * Returns true if the interface represented by ifindex is a 802.11 wireless
150 * device, false otherwise.
151 */
152bool batadv_is_wifi_iface(int ifindex)
153{
154 struct net_device *net_device = NULL;
155 bool ret = false;
156
157 if (ifindex == BATADV_NULL_IFINDEX)
158 goto out;
159
160 net_device = dev_get_by_index(&init_net, ifindex);
161 if (!net_device)
162 goto out;
163
164 ret = batadv_is_wifi_netdev(net_device);
165
166out:
167 if (net_device)
168 dev_put(net_device);
169 return ret;
170}
171
120static struct batadv_hard_iface * 172static struct batadv_hard_iface *
121batadv_hardif_get_active(const struct net_device *soft_iface) 173batadv_hardif_get_active(const struct net_device *soft_iface)
122{ 174{
@@ -525,7 +577,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
525 577
526 dev_hold(net_dev); 578 dev_hold(net_dev);
527 579
528 hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); 580 hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC);
529 if (!hard_iface) 581 if (!hard_iface)
530 goto release_dev; 582 goto release_dev;
531 583
@@ -541,18 +593,16 @@ batadv_hardif_add_interface(struct net_device *net_dev)
541 INIT_WORK(&hard_iface->cleanup_work, 593 INIT_WORK(&hard_iface->cleanup_work,
542 batadv_hardif_remove_interface_finish); 594 batadv_hardif_remove_interface_finish);
543 595
596 hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT;
597 if (batadv_is_wifi_netdev(net_dev))
598 hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
599
544 /* extra reference for return */ 600 /* extra reference for return */
545 atomic_set(&hard_iface->refcount, 2); 601 atomic_set(&hard_iface->refcount, 2);
546 602
547 batadv_check_known_mac_addr(hard_iface->net_dev); 603 batadv_check_known_mac_addr(hard_iface->net_dev);
548 list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); 604 list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
549 605
550 /* This can't be called via a bat_priv callback because
551 * we have no bat_priv yet.
552 */
553 atomic_set(&hard_iface->bat_iv.ogm_seqno, 1);
554 hard_iface->bat_iv.ogm_buff = NULL;
555
556 return hard_iface; 606 return hard_iface;
557 607
558free_if: 608free_if:
@@ -595,7 +645,7 @@ void batadv_hardif_remove_interfaces(void)
595static int batadv_hard_if_event(struct notifier_block *this, 645static int batadv_hard_if_event(struct notifier_block *this,
596 unsigned long event, void *ptr) 646 unsigned long event, void *ptr)
597{ 647{
598 struct net_device *net_dev = ptr; 648 struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
599 struct batadv_hard_iface *hard_iface; 649 struct batadv_hard_iface *hard_iface;
600 struct batadv_hard_iface *primary_if = NULL; 650 struct batadv_hard_iface *primary_if = NULL;
601 struct batadv_priv *bat_priv; 651 struct batadv_priv *bat_priv;
@@ -657,38 +707,6 @@ out:
657 return NOTIFY_DONE; 707 return NOTIFY_DONE;
658} 708}
659 709
660/* This function returns true if the interface represented by ifindex is a
661 * 802.11 wireless device
662 */
663bool batadv_is_wifi_iface(int ifindex)
664{
665 struct net_device *net_device = NULL;
666 bool ret = false;
667
668 if (ifindex == BATADV_NULL_IFINDEX)
669 goto out;
670
671 net_device = dev_get_by_index(&init_net, ifindex);
672 if (!net_device)
673 goto out;
674
675#ifdef CONFIG_WIRELESS_EXT
676 /* pre-cfg80211 drivers have to implement WEXT, so it is possible to
677 * check for wireless_handlers != NULL
678 */
679 if (net_device->wireless_handlers)
680 ret = true;
681 else
682#endif
683 /* cfg80211 drivers have to set ieee80211_ptr */
684 if (net_device->ieee80211_ptr)
685 ret = true;
686out:
687 if (net_device)
688 dev_put(net_device);
689 return ret;
690}
691
692struct notifier_block batadv_hard_if_notifier = { 710struct notifier_block batadv_hard_if_notifier = {
693 .notifier_call = batadv_hard_if_event, 711 .notifier_call = batadv_hard_if_event,
694}; 712};
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 0ba6c899b2d3..5a99bb4b6b82 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -177,13 +177,14 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
177 if (len >= sizeof(struct batadv_icmp_packet_rr)) 177 if (len >= sizeof(struct batadv_icmp_packet_rr))
178 packet_len = sizeof(struct batadv_icmp_packet_rr); 178 packet_len = sizeof(struct batadv_icmp_packet_rr);
179 179
180 skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN); 180 skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN);
181 if (!skb) { 181 if (!skb) {
182 len = -ENOMEM; 182 len = -ENOMEM;
183 goto out; 183 goto out;
184 } 184 }
185 185
186 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); 186 skb->priority = TC_PRIO_CONTROL;
187 skb_reserve(skb, ETH_HLEN);
187 icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); 188 icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len);
188 189
189 if (copy_from_user(icmp_packet, buff, packet_len)) { 190 if (copy_from_user(icmp_packet, buff, packet_len)) {
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 51aafd669cbb..c72d1bcdcf49 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -19,6 +19,10 @@
19 19
20#include <linux/crc32c.h> 20#include <linux/crc32c.h>
21#include <linux/highmem.h> 21#include <linux/highmem.h>
22#include <linux/if_vlan.h>
23#include <net/ip.h>
24#include <net/ipv6.h>
25#include <net/dsfield.h>
22#include "main.h" 26#include "main.h"
23#include "sysfs.h" 27#include "sysfs.h"
24#include "debugfs.h" 28#include "debugfs.h"
@@ -249,6 +253,60 @@ out:
249 return primary_if; 253 return primary_if;
250} 254}
251 255
256/**
257 * batadv_skb_set_priority - sets skb priority according to packet content
258 * @skb: the packet to be sent
259 * @offset: offset to the packet content
260 *
261 * This function sets a value between 256 and 263 (802.1d priority), which
262 * can be interpreted by the cfg80211 or other drivers.
263 */
264void batadv_skb_set_priority(struct sk_buff *skb, int offset)
265{
266 struct iphdr ip_hdr_tmp, *ip_hdr;
267 struct ipv6hdr ip6_hdr_tmp, *ip6_hdr;
268 struct ethhdr ethhdr_tmp, *ethhdr;
269 struct vlan_ethhdr *vhdr, vhdr_tmp;
270 u32 prio;
271
272 /* already set, do nothing */
273 if (skb->priority >= 256 && skb->priority <= 263)
274 return;
275
276 ethhdr = skb_header_pointer(skb, offset, sizeof(*ethhdr), &ethhdr_tmp);
277 if (!ethhdr)
278 return;
279
280 switch (ethhdr->h_proto) {
281 case htons(ETH_P_8021Q):
282 vhdr = skb_header_pointer(skb, offset + sizeof(*vhdr),
283 sizeof(*vhdr), &vhdr_tmp);
284 if (!vhdr)
285 return;
286 prio = ntohs(vhdr->h_vlan_TCI) & VLAN_PRIO_MASK;
287 prio = prio >> VLAN_PRIO_SHIFT;
288 break;
289 case htons(ETH_P_IP):
290 ip_hdr = skb_header_pointer(skb, offset + sizeof(*ethhdr),
291 sizeof(*ip_hdr), &ip_hdr_tmp);
292 if (!ip_hdr)
293 return;
294 prio = (ipv4_get_dsfield(ip_hdr) & 0xfc) >> 5;
295 break;
296 case htons(ETH_P_IPV6):
297 ip6_hdr = skb_header_pointer(skb, offset + sizeof(*ethhdr),
298 sizeof(*ip6_hdr), &ip6_hdr_tmp);
299 if (!ip6_hdr)
300 return;
301 prio = (ipv6_get_dsfield(ip6_hdr) & 0xfc) >> 5;
302 break;
303 default:
304 return;
305 }
306
307 skb->priority = prio + 256;
308}
309
252static int batadv_recv_unhandled_packet(struct sk_buff *skb, 310static int batadv_recv_unhandled_packet(struct sk_buff *skb,
253 struct batadv_hard_iface *recv_if) 311 struct batadv_hard_iface *recv_if)
254{ 312{
@@ -473,7 +531,6 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
473 crc = crc32c(crc, data, len); 531 crc = crc32c(crc, data, len);
474 consumed += len; 532 consumed += len;
475 } 533 }
476 skb_abort_seq_read(&st);
477 534
478 return htonl(crc); 535 return htonl(crc);
479} 536}
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 59a0d6af15c8..24675523930f 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
26#define BATADV_DRIVER_DEVICE "batman-adv" 26#define BATADV_DRIVER_DEVICE "batman-adv"
27 27
28#ifndef BATADV_SOURCE_VERSION 28#ifndef BATADV_SOURCE_VERSION
29#define BATADV_SOURCE_VERSION "2013.2.0" 29#define BATADV_SOURCE_VERSION "2013.4.0"
30#endif 30#endif
31 31
32/* B.A.T.M.A.N. parameters */ 32/* B.A.T.M.A.N. parameters */
@@ -76,6 +76,11 @@
76 76
77#define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ 77#define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */
78 78
79/* number of packets to send for broadcasts on different interface types */
80#define BATADV_NUM_BCASTS_DEFAULT 1
81#define BATADV_NUM_BCASTS_WIRELESS 3
82#define BATADV_NUM_BCASTS_MAX 3
83
79/* msecs after which an ARP_REQUEST is sent in broadcast as fallback */ 84/* msecs after which an ARP_REQUEST is sent in broadcast as fallback */
80#define ARP_REQ_DELAY 250 85#define ARP_REQ_DELAY 250
81/* numbers of originator to contact for any PUT/GET DHT operation */ 86/* numbers of originator to contact for any PUT/GET DHT operation */
@@ -157,6 +162,17 @@ enum batadv_uev_type {
157#include <linux/seq_file.h> 162#include <linux/seq_file.h>
158#include "types.h" 163#include "types.h"
159 164
165/**
166 * batadv_vlan_flags - flags for the four MSB of any vlan ID field
167 * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
168 */
169enum batadv_vlan_flags {
170 BATADV_VLAN_HAS_TAG = BIT(15),
171};
172
173#define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \
174 (int)(vid & VLAN_VID_MASK) : -1)
175
160extern char batadv_routing_algo[]; 176extern char batadv_routing_algo[];
161extern struct list_head batadv_hardif_list; 177extern struct list_head batadv_hardif_list;
162 178
@@ -168,6 +184,7 @@ void batadv_mesh_free(struct net_device *soft_iface);
168int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); 184int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
169struct batadv_hard_iface * 185struct batadv_hard_iface *
170batadv_seq_print_text_primary_if_get(struct seq_file *seq); 186batadv_seq_print_text_primary_if_get(struct seq_file *seq);
187void batadv_skb_set_priority(struct sk_buff *skb, int offset);
171int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, 188int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
172 struct packet_type *ptype, 189 struct packet_type *ptype,
173 struct net_device *orig_dev); 190 struct net_device *orig_dev);
@@ -237,7 +254,7 @@ static inline void batadv_dbg(int type __always_unused,
237 254
238/* returns 1 if they are the same ethernet addr 255/* returns 1 if they are the same ethernet addr
239 * 256 *
240 * note: can't use compare_ether_addr() as it requires aligned memory 257 * note: can't use ether_addr_equal() as it requires aligned memory
241 */ 258 */
242static inline int batadv_compare_eth(const void *data1, const void *data2) 259static inline int batadv_compare_eth(const void *data1, const void *data2)
243{ 260{
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index e84629ece9b7..a487d46e0aec 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1245,7 +1245,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
1245 return; 1245 return;
1246 1246
1247 /* Set the mac header as if we actually sent the packet uncoded */ 1247 /* Set the mac header as if we actually sent the packet uncoded */
1248 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1248 ethhdr = eth_hdr(skb);
1249 memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); 1249 memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN);
1250 memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); 1250 memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN);
1251 1251
@@ -1359,18 +1359,17 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
1359 * buffer 1359 * buffer
1360 * @skb: data skb to forward 1360 * @skb: data skb to forward
1361 * @neigh_node: next hop to forward packet to 1361 * @neigh_node: next hop to forward packet to
1362 * @ethhdr: pointer to the ethernet header inside the skb
1363 * 1362 *
1364 * Returns true if the skb was consumed (encoded packet sent) or false otherwise 1363 * Returns true if the skb was consumed (encoded packet sent) or false otherwise
1365 */ 1364 */
1366bool batadv_nc_skb_forward(struct sk_buff *skb, 1365bool batadv_nc_skb_forward(struct sk_buff *skb,
1367 struct batadv_neigh_node *neigh_node, 1366 struct batadv_neigh_node *neigh_node)
1368 struct ethhdr *ethhdr)
1369{ 1367{
1370 const struct net_device *netdev = neigh_node->if_incoming->soft_iface; 1368 const struct net_device *netdev = neigh_node->if_incoming->soft_iface;
1371 struct batadv_priv *bat_priv = netdev_priv(netdev); 1369 struct batadv_priv *bat_priv = netdev_priv(netdev);
1372 struct batadv_unicast_packet *packet; 1370 struct batadv_unicast_packet *packet;
1373 struct batadv_nc_path *nc_path; 1371 struct batadv_nc_path *nc_path;
1372 struct ethhdr *ethhdr = eth_hdr(skb);
1374 __be32 packet_id; 1373 __be32 packet_id;
1375 u8 *payload; 1374 u8 *payload;
1376 1375
@@ -1423,7 +1422,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
1423{ 1422{
1424 struct batadv_unicast_packet *packet; 1423 struct batadv_unicast_packet *packet;
1425 struct batadv_nc_path *nc_path; 1424 struct batadv_nc_path *nc_path;
1426 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); 1425 struct ethhdr *ethhdr = eth_hdr(skb);
1427 __be32 packet_id; 1426 __be32 packet_id;
1428 u8 *payload; 1427 u8 *payload;
1429 1428
@@ -1482,7 +1481,7 @@ out:
1482void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, 1481void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
1483 struct sk_buff *skb) 1482 struct sk_buff *skb)
1484{ 1483{
1485 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); 1484 struct ethhdr *ethhdr = eth_hdr(skb);
1486 1485
1487 if (batadv_is_my_mac(bat_priv, ethhdr->h_dest)) 1486 if (batadv_is_my_mac(bat_priv, ethhdr->h_dest))
1488 return; 1487 return;
@@ -1533,7 +1532,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
1533 skb_reset_network_header(skb); 1532 skb_reset_network_header(skb);
1534 1533
1535 /* Reconstruct original mac header */ 1534 /* Reconstruct original mac header */
1536 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1535 ethhdr = eth_hdr(skb);
1537 memcpy(ethhdr, &ethhdr_tmp, sizeof(*ethhdr)); 1536 memcpy(ethhdr, &ethhdr_tmp, sizeof(*ethhdr));
1538 1537
1539 /* Select the correct unicast header information based on the location 1538 /* Select the correct unicast header information based on the location
@@ -1677,7 +1676,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
1677 return NET_RX_DROP; 1676 return NET_RX_DROP;
1678 1677
1679 coded_packet = (struct batadv_coded_packet *)skb->data; 1678 coded_packet = (struct batadv_coded_packet *)skb->data;
1680 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1679 ethhdr = eth_hdr(skb);
1681 1680
1682 /* Verify frame is destined for us */ 1681 /* Verify frame is destined for us */
1683 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && 1682 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) &&
@@ -1763,6 +1762,13 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
1763 /* For each orig_node in this bin */ 1762 /* For each orig_node in this bin */
1764 rcu_read_lock(); 1763 rcu_read_lock();
1765 hlist_for_each_entry_rcu(orig_node, head, hash_entry) { 1764 hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
1765 /* no need to print the orig node if it does not have
1766 * network coding neighbors
1767 */
1768 if (list_empty(&orig_node->in_coding_list) &&
1769 list_empty(&orig_node->out_coding_list))
1770 continue;
1771
1766 seq_printf(seq, "Node: %pM\n", orig_node->orig); 1772 seq_printf(seq, "Node: %pM\n", orig_node->orig);
1767 1773
1768 seq_puts(seq, " Ingoing: "); 1774 seq_puts(seq, " Ingoing: ");
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 4fa6d0caddbd..85a4ec81ad50 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -36,8 +36,7 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
36void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv); 36void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv);
37void batadv_nc_init_orig(struct batadv_orig_node *orig_node); 37void batadv_nc_init_orig(struct batadv_orig_node *orig_node);
38bool batadv_nc_skb_forward(struct sk_buff *skb, 38bool batadv_nc_skb_forward(struct sk_buff *skb,
39 struct batadv_neigh_node *neigh_node, 39 struct batadv_neigh_node *neigh_node);
40 struct ethhdr *ethhdr);
41void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, 40void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
42 struct sk_buff *skb); 41 struct sk_buff *skb);
43void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, 42void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
@@ -87,8 +86,7 @@ static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
87} 86}
88 87
89static inline bool batadv_nc_skb_forward(struct sk_buff *skb, 88static inline bool batadv_nc_skb_forward(struct sk_buff *skb,
90 struct batadv_neigh_node *neigh_node, 89 struct batadv_neigh_node *neigh_node)
91 struct ethhdr *ethhdr)
92{ 90{
93 return false; 91 return false;
94} 92}
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index fad1a2093e15..f50553a7de62 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -92,7 +92,7 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node)
92 92
93struct batadv_neigh_node * 93struct batadv_neigh_node *
94batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, 94batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
95 const uint8_t *neigh_addr, uint32_t seqno) 95 const uint8_t *neigh_addr)
96{ 96{
97 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 97 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
98 struct batadv_neigh_node *neigh_node; 98 struct batadv_neigh_node *neigh_node;
@@ -110,8 +110,8 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
110 atomic_set(&neigh_node->refcount, 2); 110 atomic_set(&neigh_node->refcount, 2);
111 111
112 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 112 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
113 "Creating new neighbor %pM, initial seqno %d\n", 113 "Creating new neighbor %pM on interface %s\n", neigh_addr,
114 neigh_addr, seqno); 114 hard_iface->net_dev->name);
115 115
116out: 116out:
117 return neigh_node; 117 return neigh_node;
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 734e5a3d8a5b..7887b84a9af4 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -31,7 +31,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
31 const uint8_t *addr); 31 const uint8_t *addr);
32struct batadv_neigh_node * 32struct batadv_neigh_node *
33batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, 33batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
34 const uint8_t *neigh_addr, uint32_t seqno); 34 const uint8_t *neigh_addr);
35void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); 35void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node);
36struct batadv_neigh_node * 36struct batadv_neigh_node *
37batadv_orig_node_get_router(struct batadv_orig_node *orig_node); 37batadv_orig_node_get_router(struct batadv_orig_node *orig_node);
diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c
deleted file mode 100644
index ccab0bbdbb59..000000000000
--- a/net/batman-adv/ring_buffer.c
+++ /dev/null
@@ -1,51 +0,0 @@
1/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
2 *
3 * Marek Lindner
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#include "main.h"
21#include "ring_buffer.h"
22
23void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
24 uint8_t value)
25{
26 lq_recv[*lq_index] = value;
27 *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE;
28}
29
30uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
31{
32 const uint8_t *ptr;
33 uint16_t count = 0, i = 0, sum = 0;
34
35 ptr = lq_recv;
36
37 while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) {
38 if (*ptr != 0) {
39 count++;
40 sum += *ptr;
41 }
42
43 i++;
44 ptr++;
45 }
46
47 if (count == 0)
48 return 0;
49
50 return (uint8_t)(sum / count);
51}
diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h
deleted file mode 100644
index 3f92ae248e83..000000000000
--- a/net/batman-adv/ring_buffer.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
2 *
3 * Marek Lindner
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#ifndef _NET_BATMAN_ADV_RING_BUFFER_H_
21#define _NET_BATMAN_ADV_RING_BUFFER_H_
22
23void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
24 uint8_t value);
25uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]);
26
27#endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index b27a4d792d15..0439395d7ba5 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -34,35 +34,6 @@
34static int batadv_route_unicast_packet(struct sk_buff *skb, 34static int batadv_route_unicast_packet(struct sk_buff *skb,
35 struct batadv_hard_iface *recv_if); 35 struct batadv_hard_iface *recv_if);
36 36
37void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
38{
39 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
40 struct batadv_hashtable *hash = bat_priv->orig_hash;
41 struct hlist_head *head;
42 struct batadv_orig_node *orig_node;
43 unsigned long *word;
44 uint32_t i;
45 size_t word_index;
46 uint8_t *w;
47
48 for (i = 0; i < hash->size; i++) {
49 head = &hash->table[i];
50
51 rcu_read_lock();
52 hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
53 spin_lock_bh(&orig_node->ogm_cnt_lock);
54 word_index = hard_iface->if_num * BATADV_NUM_WORDS;
55 word = &(orig_node->bcast_own[word_index]);
56
57 batadv_bit_get_packet(bat_priv, word, 1, 0);
58 w = &orig_node->bcast_own_sum[hard_iface->if_num];
59 *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE);
60 spin_unlock_bh(&orig_node->ogm_cnt_lock);
61 }
62 rcu_read_unlock();
63 }
64}
65
66static void _batadv_update_route(struct batadv_priv *bat_priv, 37static void _batadv_update_route(struct batadv_priv *bat_priv,
67 struct batadv_orig_node *orig_node, 38 struct batadv_orig_node *orig_node,
68 struct batadv_neigh_node *neigh_node) 39 struct batadv_neigh_node *neigh_node)
@@ -256,7 +227,7 @@ bool batadv_check_management_packet(struct sk_buff *skb,
256 if (unlikely(!pskb_may_pull(skb, header_len))) 227 if (unlikely(!pskb_may_pull(skb, header_len)))
257 return false; 228 return false;
258 229
259 ethhdr = (struct ethhdr *)skb_mac_header(skb); 230 ethhdr = eth_hdr(skb);
260 231
261 /* packet with broadcast indication but unicast recipient */ 232 /* packet with broadcast indication but unicast recipient */
262 if (!is_broadcast_ether_addr(ethhdr->h_dest)) 233 if (!is_broadcast_ether_addr(ethhdr->h_dest))
@@ -314,7 +285,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
314 icmp_packet->msg_type = BATADV_ECHO_REPLY; 285 icmp_packet->msg_type = BATADV_ECHO_REPLY;
315 icmp_packet->header.ttl = BATADV_TTL; 286 icmp_packet->header.ttl = BATADV_TTL;
316 287
317 if (batadv_send_skb_to_orig(skb, orig_node, NULL)) 288 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
318 ret = NET_RX_SUCCESS; 289 ret = NET_RX_SUCCESS;
319 290
320out: 291out:
@@ -362,7 +333,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
362 icmp_packet->msg_type = BATADV_TTL_EXCEEDED; 333 icmp_packet->msg_type = BATADV_TTL_EXCEEDED;
363 icmp_packet->header.ttl = BATADV_TTL; 334 icmp_packet->header.ttl = BATADV_TTL;
364 335
365 if (batadv_send_skb_to_orig(skb, orig_node, NULL)) 336 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
366 ret = NET_RX_SUCCESS; 337 ret = NET_RX_SUCCESS;
367 338
368out: 339out:
@@ -392,7 +363,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
392 if (unlikely(!pskb_may_pull(skb, hdr_size))) 363 if (unlikely(!pskb_may_pull(skb, hdr_size)))
393 goto out; 364 goto out;
394 365
395 ethhdr = (struct ethhdr *)skb_mac_header(skb); 366 ethhdr = eth_hdr(skb);
396 367
397 /* packet with unicast indication but broadcast recipient */ 368 /* packet with unicast indication but broadcast recipient */
398 if (is_broadcast_ether_addr(ethhdr->h_dest)) 369 if (is_broadcast_ether_addr(ethhdr->h_dest))
@@ -439,7 +410,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
439 icmp_packet->header.ttl--; 410 icmp_packet->header.ttl--;
440 411
441 /* route it */ 412 /* route it */
442 if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) 413 if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP)
443 ret = NET_RX_SUCCESS; 414 ret = NET_RX_SUCCESS;
444 415
445out: 416out:
@@ -569,7 +540,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
569 if (unlikely(!pskb_may_pull(skb, hdr_size))) 540 if (unlikely(!pskb_may_pull(skb, hdr_size)))
570 return -ENODATA; 541 return -ENODATA;
571 542
572 ethhdr = (struct ethhdr *)skb_mac_header(skb); 543 ethhdr = eth_hdr(skb);
573 544
574 /* packet with unicast indication but broadcast recipient */ 545 /* packet with unicast indication but broadcast recipient */
575 if (is_broadcast_ether_addr(ethhdr->h_dest)) 546 if (is_broadcast_ether_addr(ethhdr->h_dest))
@@ -803,8 +774,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
803 struct batadv_orig_node *orig_node = NULL; 774 struct batadv_orig_node *orig_node = NULL;
804 struct batadv_neigh_node *neigh_node = NULL; 775 struct batadv_neigh_node *neigh_node = NULL;
805 struct batadv_unicast_packet *unicast_packet; 776 struct batadv_unicast_packet *unicast_packet;
806 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); 777 struct ethhdr *ethhdr = eth_hdr(skb);
807 int ret = NET_RX_DROP; 778 int res, hdr_len, ret = NET_RX_DROP;
808 struct sk_buff *new_skb; 779 struct sk_buff *new_skb;
809 780
810 unicast_packet = (struct batadv_unicast_packet *)skb->data; 781 unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -864,16 +835,35 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
864 /* decrement ttl */ 835 /* decrement ttl */
865 unicast_packet->header.ttl--; 836 unicast_packet->header.ttl--;
866 837
867 /* network code packet if possible */ 838 switch (unicast_packet->header.packet_type) {
868 if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) { 839 case BATADV_UNICAST_4ADDR:
869 ret = NET_RX_SUCCESS; 840 hdr_len = sizeof(struct batadv_unicast_4addr_packet);
870 } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) { 841 break;
871 ret = NET_RX_SUCCESS; 842 case BATADV_UNICAST:
843 hdr_len = sizeof(struct batadv_unicast_packet);
844 break;
845 default:
846 /* other packet types not supported - yet */
847 hdr_len = -1;
848 break;
849 }
850
851 if (hdr_len > 0)
852 batadv_skb_set_priority(skb, hdr_len);
853
854 res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
872 855
873 /* Update stats counter */ 856 /* translate transmit result into receive result */
857 if (res == NET_XMIT_SUCCESS) {
858 /* skb was transmitted and consumed */
874 batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); 859 batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
875 batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, 860 batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
876 skb->len + ETH_HLEN); 861 skb->len + ETH_HLEN);
862
863 ret = NET_RX_SUCCESS;
864 } else if (res == NET_XMIT_POLICED) {
865 /* skb was buffered and consumed */
866 ret = NET_RX_SUCCESS;
877 } 867 }
878 868
879out: 869out:
@@ -1165,7 +1155,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
1165 if (unlikely(!pskb_may_pull(skb, hdr_size))) 1155 if (unlikely(!pskb_may_pull(skb, hdr_size)))
1166 goto out; 1156 goto out;
1167 1157
1168 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1158 ethhdr = eth_hdr(skb);
1169 1159
1170 /* packet with broadcast indication but unicast recipient */ 1160 /* packet with broadcast indication but unicast recipient */
1171 if (!is_broadcast_ether_addr(ethhdr->h_dest)) 1161 if (!is_broadcast_ether_addr(ethhdr->h_dest))
@@ -1219,6 +1209,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
1219 if (batadv_bla_check_bcast_duplist(bat_priv, skb)) 1209 if (batadv_bla_check_bcast_duplist(bat_priv, skb))
1220 goto out; 1210 goto out;
1221 1211
1212 batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet));
1213
1222 /* rebroadcast packet */ 1214 /* rebroadcast packet */
1223 batadv_add_bcast_packet_to_list(bat_priv, skb, 1); 1215 batadv_add_bcast_packet_to_list(bat_priv, skb, 1);
1224 1216
@@ -1265,7 +1257,7 @@ int batadv_recv_vis_packet(struct sk_buff *skb,
1265 return NET_RX_DROP; 1257 return NET_RX_DROP;
1266 1258
1267 vis_packet = (struct batadv_vis_packet *)skb->data; 1259 vis_packet = (struct batadv_vis_packet *)skb->data;
1268 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1260 ethhdr = eth_hdr(skb);
1269 1261
1270 /* not for me */ 1262 /* not for me */
1271 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) 1263 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 99eeafaba407..72a29bde2010 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -20,7 +20,6 @@
20#ifndef _NET_BATMAN_ADV_ROUTING_H_ 20#ifndef _NET_BATMAN_ADV_ROUTING_H_
21#define _NET_BATMAN_ADV_ROUTING_H_ 21#define _NET_BATMAN_ADV_ROUTING_H_
22 22
23void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface);
24bool batadv_check_management_packet(struct sk_buff *skb, 23bool batadv_check_management_packet(struct sk_buff *skb,
25 struct batadv_hard_iface *hard_iface, 24 struct batadv_hard_iface *hard_iface,
26 int header_len); 25 int header_len);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 263cfd1ccee7..0266edd0fa7f 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -61,13 +61,12 @@ int batadv_send_skb_packet(struct sk_buff *skb,
61 61
62 skb_reset_mac_header(skb); 62 skb_reset_mac_header(skb);
63 63
64 ethhdr = (struct ethhdr *)skb_mac_header(skb); 64 ethhdr = eth_hdr(skb);
65 memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); 65 memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN);
66 memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); 66 memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN);
67 ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); 67 ethhdr->h_proto = __constant_htons(ETH_P_BATMAN);
68 68
69 skb_set_network_header(skb, ETH_HLEN); 69 skb_set_network_header(skb, ETH_HLEN);
70 skb->priority = TC_PRIO_CONTROL;
71 skb->protocol = __constant_htons(ETH_P_BATMAN); 70 skb->protocol = __constant_htons(ETH_P_BATMAN);
72 71
73 skb->dev = hard_iface->net_dev; 72 skb->dev = hard_iface->net_dev;
@@ -96,26 +95,37 @@ send_skb_err:
96 * host, NULL can be passed as recv_if and no interface alternating is 95 * host, NULL can be passed as recv_if and no interface alternating is
97 * attempted. 96 * attempted.
98 * 97 *
99 * Returns TRUE on success; FALSE otherwise. 98 * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
99 * NET_XMIT_POLICED if the skb is buffered for later transmit.
100 */ 100 */
101bool batadv_send_skb_to_orig(struct sk_buff *skb, 101int batadv_send_skb_to_orig(struct sk_buff *skb,
102 struct batadv_orig_node *orig_node, 102 struct batadv_orig_node *orig_node,
103 struct batadv_hard_iface *recv_if) 103 struct batadv_hard_iface *recv_if)
104{ 104{
105 struct batadv_priv *bat_priv = orig_node->bat_priv; 105 struct batadv_priv *bat_priv = orig_node->bat_priv;
106 struct batadv_neigh_node *neigh_node; 106 struct batadv_neigh_node *neigh_node;
107 int ret = NET_XMIT_DROP;
107 108
108 /* batadv_find_router() increases neigh_nodes refcount if found. */ 109 /* batadv_find_router() increases neigh_nodes refcount if found. */
109 neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); 110 neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
110 if (!neigh_node) 111 if (!neigh_node)
111 return false; 112 return ret;
112 113
113 /* route it */ 114 /* try to network code the packet, if it is received on an interface
114 batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); 115 * (i.e. being forwarded). If the packet originates from this node or if
116 * network coding fails, then send the packet as usual.
117 */
118 if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) {
119 ret = NET_XMIT_POLICED;
120 } else {
121 batadv_send_skb_packet(skb, neigh_node->if_incoming,
122 neigh_node->addr);
123 ret = NET_XMIT_SUCCESS;
124 }
115 125
116 batadv_neigh_node_free_ref(neigh_node); 126 batadv_neigh_node_free_ref(neigh_node);
117 127
118 return true; 128 return ret;
119} 129}
120 130
121void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) 131void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
@@ -152,8 +162,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
152 struct batadv_forw_packet *forw_packet, 162 struct batadv_forw_packet *forw_packet,
153 unsigned long send_time) 163 unsigned long send_time)
154{ 164{
155 INIT_HLIST_NODE(&forw_packet->list);
156
157 /* add new packet to packet list */ 165 /* add new packet to packet list */
158 spin_lock_bh(&bat_priv->forw_bcast_list_lock); 166 spin_lock_bh(&bat_priv->forw_bcast_list_lock);
159 hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list); 167 hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list);
@@ -260,6 +268,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
260 if (hard_iface->soft_iface != soft_iface) 268 if (hard_iface->soft_iface != soft_iface)
261 continue; 269 continue;
262 270
271 if (forw_packet->num_packets >= hard_iface->num_bcasts)
272 continue;
273
263 /* send a copy of the saved skb */ 274 /* send a copy of the saved skb */
264 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); 275 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
265 if (skb1) 276 if (skb1)
@@ -271,7 +282,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
271 forw_packet->num_packets++; 282 forw_packet->num_packets++;
272 283
273 /* if we still have some more bcasts to send */ 284 /* if we still have some more bcasts to send */
274 if (forw_packet->num_packets < 3) { 285 if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) {
275 _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, 286 _batadv_add_bcast_packet_to_list(bat_priv, forw_packet,
276 msecs_to_jiffies(5)); 287 msecs_to_jiffies(5));
277 return; 288 return;
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 38e662f619ac..e7b17880fca4 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -23,9 +23,9 @@
23int batadv_send_skb_packet(struct sk_buff *skb, 23int batadv_send_skb_packet(struct sk_buff *skb,
24 struct batadv_hard_iface *hard_iface, 24 struct batadv_hard_iface *hard_iface,
25 const uint8_t *dst_addr); 25 const uint8_t *dst_addr);
26bool batadv_send_skb_to_orig(struct sk_buff *skb, 26int batadv_send_skb_to_orig(struct sk_buff *skb,
27 struct batadv_orig_node *orig_node, 27 struct batadv_orig_node *orig_node,
28 struct batadv_hard_iface *recv_if); 28 struct batadv_hard_iface *recv_if);
29void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface); 29void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface);
30int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, 30int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
31 const struct sk_buff *skb, 31 const struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 819dfb006cdf..813db4e64602 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -154,7 +154,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
154 0x00, 0x00}; 154 0x00, 0x00};
155 unsigned int header_len = 0; 155 unsigned int header_len = 0;
156 int data_len = skb->len, ret; 156 int data_len = skb->len, ret;
157 short vid __maybe_unused = -1; 157 unsigned short vid __maybe_unused = BATADV_NO_FLAGS;
158 bool do_bcast = false; 158 bool do_bcast = false;
159 uint32_t seqno; 159 uint32_t seqno;
160 unsigned long brd_delay = 1; 160 unsigned long brd_delay = 1;
@@ -168,6 +168,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
168 case ETH_P_8021Q: 168 case ETH_P_8021Q:
169 vhdr = (struct vlan_ethhdr *)skb->data; 169 vhdr = (struct vlan_ethhdr *)skb->data;
170 vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; 170 vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
171 vid |= BATADV_VLAN_HAS_TAG;
171 172
172 if (vhdr->h_vlan_encapsulated_proto != ethertype) 173 if (vhdr->h_vlan_encapsulated_proto != ethertype)
173 break; 174 break;
@@ -180,6 +181,9 @@ static int batadv_interface_tx(struct sk_buff *skb,
180 if (batadv_bla_tx(bat_priv, skb, vid)) 181 if (batadv_bla_tx(bat_priv, skb, vid))
181 goto dropped; 182 goto dropped;
182 183
184 /* skb->data might have been reallocated by batadv_bla_tx() */
185 ethhdr = (struct ethhdr *)skb->data;
186
183 /* Register the client MAC in the transtable */ 187 /* Register the client MAC in the transtable */
184 if (!is_multicast_ether_addr(ethhdr->h_source)) 188 if (!is_multicast_ether_addr(ethhdr->h_source))
185 batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); 189 batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif);
@@ -220,8 +224,14 @@ static int batadv_interface_tx(struct sk_buff *skb,
220 default: 224 default:
221 break; 225 break;
222 } 226 }
227
228 /* reminder: ethhdr might have become unusable from here on
229 * (batadv_gw_is_dhcp_target() might have reallocated skb data)
230 */
223 } 231 }
224 232
233 batadv_skb_set_priority(skb, 0);
234
225 /* ethernet packet should be broadcasted */ 235 /* ethernet packet should be broadcasted */
226 if (do_bcast) { 236 if (do_bcast) {
227 primary_if = batadv_primary_if_get_selected(bat_priv); 237 primary_if = batadv_primary_if_get_selected(bat_priv);
@@ -266,7 +276,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
266 /* unicast packet */ 276 /* unicast packet */
267 } else { 277 } else {
268 if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_OFF) { 278 if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_OFF) {
269 ret = batadv_gw_out_of_range(bat_priv, skb, ethhdr); 279 ret = batadv_gw_out_of_range(bat_priv, skb);
270 if (ret) 280 if (ret)
271 goto dropped; 281 goto dropped;
272 } 282 }
@@ -303,7 +313,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
303 struct ethhdr *ethhdr; 313 struct ethhdr *ethhdr;
304 struct vlan_ethhdr *vhdr; 314 struct vlan_ethhdr *vhdr;
305 struct batadv_header *batadv_header = (struct batadv_header *)skb->data; 315 struct batadv_header *batadv_header = (struct batadv_header *)skb->data;
306 short vid __maybe_unused = -1; 316 unsigned short vid __maybe_unused = BATADV_NO_FLAGS;
307 __be16 ethertype = __constant_htons(ETH_P_BATMAN); 317 __be16 ethertype = __constant_htons(ETH_P_BATMAN);
308 bool is_bcast; 318 bool is_bcast;
309 319
@@ -316,12 +326,13 @@ void batadv_interface_rx(struct net_device *soft_iface,
316 skb_pull_rcsum(skb, hdr_size); 326 skb_pull_rcsum(skb, hdr_size);
317 skb_reset_mac_header(skb); 327 skb_reset_mac_header(skb);
318 328
319 ethhdr = (struct ethhdr *)skb_mac_header(skb); 329 ethhdr = eth_hdr(skb);
320 330
321 switch (ntohs(ethhdr->h_proto)) { 331 switch (ntohs(ethhdr->h_proto)) {
322 case ETH_P_8021Q: 332 case ETH_P_8021Q:
323 vhdr = (struct vlan_ethhdr *)skb->data; 333 vhdr = (struct vlan_ethhdr *)skb->data;
324 vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; 334 vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
335 vid |= BATADV_VLAN_HAS_TAG;
325 336
326 if (vhdr->h_vlan_encapsulated_proto != ethertype) 337 if (vhdr->h_vlan_encapsulated_proto != ethertype)
327 break; 338 break;
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 15a22efa9a67..4114b961bc2c 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -385,6 +385,10 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
385 curr_gw_mode_str, buff); 385 curr_gw_mode_str, buff);
386 386
387 batadv_gw_deselect(bat_priv); 387 batadv_gw_deselect(bat_priv);
388 /* always call batadv_gw_check_client_stop() before changing the gateway
389 * state
390 */
391 batadv_gw_check_client_stop(bat_priv);
388 atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp); 392 atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp);
389 return count; 393 return count;
390} 394}
@@ -582,10 +586,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
582 (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0)) 586 (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
583 goto out; 587 goto out;
584 588
585 if (!rtnl_trylock()) { 589 rtnl_lock();
586 ret = -ERESTARTSYS;
587 goto out;
588 }
589 590
590 if (status_tmp == BATADV_IF_NOT_IN_USE) { 591 if (status_tmp == BATADV_IF_NOT_IN_USE) {
591 batadv_hardif_disable_interface(hard_iface, 592 batadv_hardif_disable_interface(hard_iface,
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 9e8748575845..34510f38708f 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -163,10 +163,19 @@ batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
163 call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); 163 call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
164} 164}
165 165
166/**
167 * batadv_tt_local_event - store a local TT event (ADD/DEL)
168 * @bat_priv: the bat priv with all the soft interface information
169 * @tt_local_entry: the TT entry involved in the event
170 * @event_flags: flags to store in the event structure
171 */
166static void batadv_tt_local_event(struct batadv_priv *bat_priv, 172static void batadv_tt_local_event(struct batadv_priv *bat_priv,
167 const uint8_t *addr, uint8_t flags) 173 struct batadv_tt_local_entry *tt_local_entry,
174 uint8_t event_flags)
168{ 175{
169 struct batadv_tt_change_node *tt_change_node, *entry, *safe; 176 struct batadv_tt_change_node *tt_change_node, *entry, *safe;
177 struct batadv_tt_common_entry *common = &tt_local_entry->common;
178 uint8_t flags = common->flags | event_flags;
170 bool event_removed = false; 179 bool event_removed = false;
171 bool del_op_requested, del_op_entry; 180 bool del_op_requested, del_op_entry;
172 181
@@ -176,7 +185,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
176 return; 185 return;
177 186
178 tt_change_node->change.flags = flags; 187 tt_change_node->change.flags = flags;
179 memcpy(tt_change_node->change.addr, addr, ETH_ALEN); 188 memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN);
180 189
181 del_op_requested = flags & BATADV_TT_CLIENT_DEL; 190 del_op_requested = flags & BATADV_TT_CLIENT_DEL;
182 191
@@ -184,7 +193,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
184 spin_lock_bh(&bat_priv->tt.changes_list_lock); 193 spin_lock_bh(&bat_priv->tt.changes_list_lock);
185 list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, 194 list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
186 list) { 195 list) {
187 if (!batadv_compare_eth(entry->change.addr, addr)) 196 if (!batadv_compare_eth(entry->change.addr, common->addr))
188 continue; 197 continue;
189 198
190 /* DEL+ADD in the same orig interval have no effect and can be 199 /* DEL+ADD in the same orig interval have no effect and can be
@@ -332,7 +341,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
332 } 341 }
333 342
334add_event: 343add_event:
335 batadv_tt_local_event(bat_priv, addr, tt_local->common.flags); 344 batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS);
336 345
337check_roaming: 346check_roaming:
338 /* Check whether it is a roaming, but don't do anything if the roaming 347 /* Check whether it is a roaming, but don't do anything if the roaming
@@ -529,8 +538,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
529 struct batadv_tt_local_entry *tt_local_entry, 538 struct batadv_tt_local_entry *tt_local_entry,
530 uint16_t flags, const char *message) 539 uint16_t flags, const char *message)
531{ 540{
532 batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, 541 batadv_tt_local_event(bat_priv, tt_local_entry, flags);
533 tt_local_entry->common.flags | flags);
534 542
535 /* The local client has to be marked as "pending to be removed" but has 543 /* The local client has to be marked as "pending to be removed" but has
536 * to be kept in the table in order to send it in a full table 544 * to be kept in the table in order to send it in a full table
@@ -584,8 +592,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
584 /* if this client has been added right now, it is possible to 592 /* if this client has been added right now, it is possible to
585 * immediately purge it 593 * immediately purge it
586 */ 594 */
587 batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, 595 batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL);
588 curr_flags | BATADV_TT_CLIENT_DEL);
589 hlist_del_rcu(&tt_local_entry->common.hash_entry); 596 hlist_del_rcu(&tt_local_entry->common.hash_entry);
590 batadv_tt_local_entry_free_ref(tt_local_entry); 597 batadv_tt_local_entry_free_ref(tt_local_entry);
591 598
@@ -791,10 +798,25 @@ out:
791 batadv_tt_orig_list_entry_free_ref(orig_entry); 798 batadv_tt_orig_list_entry_free_ref(orig_entry);
792} 799}
793 800
794/* caller must hold orig_node refcount */ 801/**
802 * batadv_tt_global_add - add a new TT global entry or update an existing one
803 * @bat_priv: the bat priv with all the soft interface information
804 * @orig_node: the originator announcing the client
805 * @tt_addr: the mac address of the non-mesh client
806 * @flags: TT flags that have to be set for this non-mesh client
807 * @ttvn: the tt version number ever announcing this non-mesh client
808 *
809 * Add a new TT global entry for the given originator. If the entry already
810 * exists add a new reference to the given originator (a global entry can have
811 * references to multiple originators) and adjust the flags attribute to reflect
812 * the function argument.
813 * If a TT local entry exists for this non-mesh client remove it.
814 *
815 * The caller must hold orig_node refcount.
816 */
795int batadv_tt_global_add(struct batadv_priv *bat_priv, 817int batadv_tt_global_add(struct batadv_priv *bat_priv,
796 struct batadv_orig_node *orig_node, 818 struct batadv_orig_node *orig_node,
797 const unsigned char *tt_addr, uint8_t flags, 819 const unsigned char *tt_addr, uint16_t flags,
798 uint8_t ttvn) 820 uint8_t ttvn)
799{ 821{
800 struct batadv_tt_global_entry *tt_global_entry; 822 struct batadv_tt_global_entry *tt_global_entry;
@@ -1600,11 +1622,12 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
1600 tt_tot = tt_len / sizeof(struct batadv_tt_change); 1622 tt_tot = tt_len / sizeof(struct batadv_tt_change);
1601 1623
1602 len = tt_query_size + tt_len; 1624 len = tt_query_size + tt_len;
1603 skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); 1625 skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
1604 if (!skb) 1626 if (!skb)
1605 goto out; 1627 goto out;
1606 1628
1607 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); 1629 skb->priority = TC_PRIO_CONTROL;
1630 skb_reserve(skb, ETH_HLEN);
1608 tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); 1631 tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len);
1609 tt_response->ttvn = ttvn; 1632 tt_response->ttvn = ttvn;
1610 1633
@@ -1665,11 +1688,12 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
1665 if (!tt_req_node) 1688 if (!tt_req_node)
1666 goto out; 1689 goto out;
1667 1690
1668 skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN); 1691 skb = netdev_alloc_skb_ip_align(NULL, sizeof(*tt_request) + ETH_HLEN);
1669 if (!skb) 1692 if (!skb)
1670 goto out; 1693 goto out;
1671 1694
1672 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); 1695 skb->priority = TC_PRIO_CONTROL;
1696 skb_reserve(skb, ETH_HLEN);
1673 1697
1674 tt_req_len = sizeof(*tt_request); 1698 tt_req_len = sizeof(*tt_request);
1675 tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len); 1699 tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len);
@@ -1691,7 +1715,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
1691 1715
1692 batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX); 1716 batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX);
1693 1717
1694 if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL)) 1718 if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL) != NET_XMIT_DROP)
1695 ret = 0; 1719 ret = 0;
1696 1720
1697out: 1721out:
@@ -1715,7 +1739,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1715 struct batadv_orig_node *req_dst_orig_node; 1739 struct batadv_orig_node *req_dst_orig_node;
1716 struct batadv_orig_node *res_dst_orig_node = NULL; 1740 struct batadv_orig_node *res_dst_orig_node = NULL;
1717 uint8_t orig_ttvn, req_ttvn, ttvn; 1741 uint8_t orig_ttvn, req_ttvn, ttvn;
1718 int ret = false; 1742 int res, ret = false;
1719 unsigned char *tt_buff; 1743 unsigned char *tt_buff;
1720 bool full_table; 1744 bool full_table;
1721 uint16_t tt_len, tt_tot; 1745 uint16_t tt_len, tt_tot;
@@ -1762,11 +1786,12 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1762 tt_tot = tt_len / sizeof(struct batadv_tt_change); 1786 tt_tot = tt_len / sizeof(struct batadv_tt_change);
1763 1787
1764 len = sizeof(*tt_response) + tt_len; 1788 len = sizeof(*tt_response) + tt_len;
1765 skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); 1789 skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
1766 if (!skb) 1790 if (!skb)
1767 goto unlock; 1791 goto unlock;
1768 1792
1769 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); 1793 skb->priority = TC_PRIO_CONTROL;
1794 skb_reserve(skb, ETH_HLEN);
1770 packet_pos = skb_put(skb, len); 1795 packet_pos = skb_put(skb, len);
1771 tt_response = (struct batadv_tt_query_packet *)packet_pos; 1796 tt_response = (struct batadv_tt_query_packet *)packet_pos;
1772 tt_response->ttvn = req_ttvn; 1797 tt_response->ttvn = req_ttvn;
@@ -1810,8 +1835,10 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1810 1835
1811 batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); 1836 batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX);
1812 1837
1813 if (batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL)) 1838 res = batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL);
1839 if (res != NET_XMIT_DROP)
1814 ret = true; 1840 ret = true;
1841
1815 goto out; 1842 goto out;
1816 1843
1817unlock: 1844unlock:
@@ -1878,11 +1905,12 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
1878 tt_tot = tt_len / sizeof(struct batadv_tt_change); 1905 tt_tot = tt_len / sizeof(struct batadv_tt_change);
1879 1906
1880 len = sizeof(*tt_response) + tt_len; 1907 len = sizeof(*tt_response) + tt_len;
1881 skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); 1908 skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
1882 if (!skb) 1909 if (!skb)
1883 goto unlock; 1910 goto unlock;
1884 1911
1885 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); 1912 skb->priority = TC_PRIO_CONTROL;
1913 skb_reserve(skb, ETH_HLEN);
1886 packet_pos = skb_put(skb, len); 1914 packet_pos = skb_put(skb, len);
1887 tt_response = (struct batadv_tt_query_packet *)packet_pos; 1915 tt_response = (struct batadv_tt_query_packet *)packet_pos;
1888 tt_response->ttvn = req_ttvn; 1916 tt_response->ttvn = req_ttvn;
@@ -1925,7 +1953,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
1925 1953
1926 batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); 1954 batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX);
1927 1955
1928 if (batadv_send_skb_to_orig(skb, orig_node, NULL)) 1956 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
1929 ret = true; 1957 ret = true;
1930 goto out; 1958 goto out;
1931 1959
@@ -2212,11 +2240,12 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
2212 if (!batadv_tt_check_roam_count(bat_priv, client)) 2240 if (!batadv_tt_check_roam_count(bat_priv, client))
2213 goto out; 2241 goto out;
2214 2242
2215 skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN); 2243 skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
2216 if (!skb) 2244 if (!skb)
2217 goto out; 2245 goto out;
2218 2246
2219 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); 2247 skb->priority = TC_PRIO_CONTROL;
2248 skb_reserve(skb, ETH_HLEN);
2220 2249
2221 roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); 2250 roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len);
2222 2251
@@ -2238,7 +2267,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
2238 2267
2239 batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); 2268 batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX);
2240 2269
2241 if (batadv_send_skb_to_orig(skb, orig_node, NULL)) 2270 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
2242 ret = 0; 2271 ret = 0;
2243 2272
2244out: 2273out:
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index ab8e683b402f..659a3bb759ce 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -33,7 +33,7 @@ void batadv_tt_global_add_orig(struct batadv_priv *bat_priv,
33 const unsigned char *tt_buff, int tt_buff_len); 33 const unsigned char *tt_buff, int tt_buff_len);
34int batadv_tt_global_add(struct batadv_priv *bat_priv, 34int batadv_tt_global_add(struct batadv_priv *bat_priv,
35 struct batadv_orig_node *orig_node, 35 struct batadv_orig_node *orig_node,
36 const unsigned char *addr, uint8_t flags, 36 const unsigned char *addr, uint16_t flags,
37 uint8_t ttvn); 37 uint8_t ttvn);
38int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); 38int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
39void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, 39void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index aba8364c3689..b2c94e139319 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -61,6 +61,7 @@ struct batadv_hard_iface_bat_iv {
61 * @if_status: status of the interface for batman-adv 61 * @if_status: status of the interface for batman-adv
62 * @net_dev: pointer to the net_device 62 * @net_dev: pointer to the net_device
63 * @frag_seqno: last fragment sequence number sent by this interface 63 * @frag_seqno: last fragment sequence number sent by this interface
64 * @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
64 * @hardif_obj: kobject of the per interface sysfs "mesh" directory 65 * @hardif_obj: kobject of the per interface sysfs "mesh" directory
65 * @refcount: number of contexts the object is used 66 * @refcount: number of contexts the object is used
66 * @batman_adv_ptype: packet type describing packets that should be processed by 67 * @batman_adv_ptype: packet type describing packets that should be processed by
@@ -76,6 +77,7 @@ struct batadv_hard_iface {
76 char if_status; 77 char if_status;
77 struct net_device *net_dev; 78 struct net_device *net_dev;
78 atomic_t frag_seqno; 79 atomic_t frag_seqno;
80 uint8_t num_bcasts;
79 struct kobject *hardif_obj; 81 struct kobject *hardif_obj;
80 atomic_t refcount; 82 atomic_t refcount;
81 struct packet_type batman_adv_ptype; 83 struct packet_type batman_adv_ptype;
@@ -640,7 +642,7 @@ struct batadv_socket_packet {
640#ifdef CONFIG_BATMAN_ADV_BLA 642#ifdef CONFIG_BATMAN_ADV_BLA
641struct batadv_bla_backbone_gw { 643struct batadv_bla_backbone_gw {
642 uint8_t orig[ETH_ALEN]; 644 uint8_t orig[ETH_ALEN];
643 short vid; 645 unsigned short vid;
644 struct hlist_node hash_entry; 646 struct hlist_node hash_entry;
645 struct batadv_priv *bat_priv; 647 struct batadv_priv *bat_priv;
646 unsigned long lasttime; 648 unsigned long lasttime;
@@ -663,7 +665,7 @@ struct batadv_bla_backbone_gw {
663 */ 665 */
664struct batadv_bla_claim { 666struct batadv_bla_claim {
665 uint8_t addr[ETH_ALEN]; 667 uint8_t addr[ETH_ALEN];
666 short vid; 668 unsigned short vid;
667 struct batadv_bla_backbone_gw *backbone_gw; 669 struct batadv_bla_backbone_gw *backbone_gw;
668 unsigned long lasttime; 670 unsigned long lasttime;
669 struct hlist_node hash_entry; 671 struct hlist_node hash_entry;
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 0bb3b5982f94..48b31d33ce6b 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -242,6 +242,8 @@ int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv,
242 frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); 242 frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
243 if (!frag_skb) 243 if (!frag_skb)
244 goto dropped; 244 goto dropped;
245
246 skb->priority = TC_PRIO_CONTROL;
245 skb_reserve(frag_skb, ucf_hdr_len); 247 skb_reserve(frag_skb, ucf_hdr_len);
246 248
247 unicast_packet = (struct batadv_unicast_packet *)skb->data; 249 unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -326,7 +328,9 @@ static bool batadv_unicast_push_and_fill_skb(struct sk_buff *skb, int hdr_size,
326 * @skb: the skb containing the payload to encapsulate 328 * @skb: the skb containing the payload to encapsulate
327 * @orig_node: the destination node 329 * @orig_node: the destination node
328 * 330 *
329 * Returns false if the payload could not be encapsulated or true otherwise 331 * Returns false if the payload could not be encapsulated or true otherwise.
332 *
333 * This call might reallocate skb data.
330 */ 334 */
331static bool batadv_unicast_prepare_skb(struct sk_buff *skb, 335static bool batadv_unicast_prepare_skb(struct sk_buff *skb,
332 struct batadv_orig_node *orig_node) 336 struct batadv_orig_node *orig_node)
@@ -343,7 +347,9 @@ static bool batadv_unicast_prepare_skb(struct sk_buff *skb,
343 * @orig_node: the destination node 347 * @orig_node: the destination node
344 * @packet_subtype: the batman 4addr packet subtype to use 348 * @packet_subtype: the batman 4addr packet subtype to use
345 * 349 *
346 * Returns false if the payload could not be encapsulated or true otherwise 350 * Returns false if the payload could not be encapsulated or true otherwise.
351 *
352 * This call might reallocate skb data.
347 */ 353 */
348bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv, 354bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv,
349 struct sk_buff *skb, 355 struct sk_buff *skb,
@@ -401,7 +407,7 @@ int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv,
401 struct batadv_neigh_node *neigh_node; 407 struct batadv_neigh_node *neigh_node;
402 int data_len = skb->len; 408 int data_len = skb->len;
403 int ret = NET_RX_DROP; 409 int ret = NET_RX_DROP;
404 unsigned int dev_mtu; 410 unsigned int dev_mtu, header_len;
405 411
406 /* get routing information */ 412 /* get routing information */
407 if (is_multicast_ether_addr(ethhdr->h_dest)) { 413 if (is_multicast_ether_addr(ethhdr->h_dest)) {
@@ -428,11 +434,17 @@ find_router:
428 434
429 switch (packet_type) { 435 switch (packet_type) {
430 case BATADV_UNICAST: 436 case BATADV_UNICAST:
431 batadv_unicast_prepare_skb(skb, orig_node); 437 if (!batadv_unicast_prepare_skb(skb, orig_node))
438 goto out;
439
440 header_len = sizeof(struct batadv_unicast_packet);
432 break; 441 break;
433 case BATADV_UNICAST_4ADDR: 442 case BATADV_UNICAST_4ADDR:
434 batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node, 443 if (!batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
435 packet_subtype); 444 packet_subtype))
445 goto out;
446
447 header_len = sizeof(struct batadv_unicast_4addr_packet);
436 break; 448 break;
437 default: 449 default:
438 /* this function supports UNICAST and UNICAST_4ADDR only. It 450 /* this function supports UNICAST and UNICAST_4ADDR only. It
@@ -441,6 +453,7 @@ find_router:
441 goto out; 453 goto out;
442 } 454 }
443 455
456 ethhdr = (struct ethhdr *)(skb->data + header_len);
444 unicast_packet = (struct batadv_unicast_packet *)skb->data; 457 unicast_packet = (struct batadv_unicast_packet *)skb->data;
445 458
446 /* inform the destination node that we are still missing a correct route 459 /* inform the destination node that we are still missing a correct route
@@ -464,7 +477,7 @@ find_router:
464 goto out; 477 goto out;
465 } 478 }
466 479
467 if (batadv_send_skb_to_orig(skb, orig_node, NULL)) 480 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
468 ret = 0; 481 ret = 0;
469 482
470out: 483out:
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 1625e5793a89..d8ea31a58457 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -392,12 +392,13 @@ batadv_add_packet(struct batadv_priv *bat_priv,
392 return NULL; 392 return NULL;
393 393
394 len = sizeof(*packet) + vis_info_len; 394 len = sizeof(*packet) + vis_info_len;
395 info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); 395 info->skb_packet = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
396 if (!info->skb_packet) { 396 if (!info->skb_packet) {
397 kfree(info); 397 kfree(info);
398 return NULL; 398 return NULL;
399 } 399 }
400 skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN); 400 info->skb_packet->priority = TC_PRIO_CONTROL;
401 skb_reserve(info->skb_packet, ETH_HLEN);
401 packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); 402 packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len);
402 403
403 kref_init(&info->refcount); 404 kref_init(&info->refcount);
@@ -697,7 +698,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
697 struct batadv_orig_node *orig_node; 698 struct batadv_orig_node *orig_node;
698 struct batadv_vis_packet *packet; 699 struct batadv_vis_packet *packet;
699 struct sk_buff *skb; 700 struct sk_buff *skb;
700 uint32_t i; 701 uint32_t i, res;
701 702
702 703
703 packet = (struct batadv_vis_packet *)info->skb_packet->data; 704 packet = (struct batadv_vis_packet *)info->skb_packet->data;
@@ -724,7 +725,8 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
724 if (!skb) 725 if (!skb)
725 continue; 726 continue;
726 727
727 if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) 728 res = batadv_send_skb_to_orig(skb, orig_node, NULL);
729 if (res == NET_XMIT_DROP)
728 kfree_skb(skb); 730 kfree_skb(skb);
729 } 731 }
730 rcu_read_unlock(); 732 rcu_read_unlock();
@@ -748,7 +750,7 @@ static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv,
748 if (!skb) 750 if (!skb)
749 goto out; 751 goto out;
750 752
751 if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) 753 if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
752 kfree_skb(skb); 754 kfree_skb(skb);
753 755
754out: 756out:
@@ -854,13 +856,14 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
854 if (!bat_priv->vis.my_info) 856 if (!bat_priv->vis.my_info)
855 goto err; 857 goto err;
856 858
857 len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE; 859 len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN;
858 len += ETH_HLEN + NET_IP_ALIGN; 860 bat_priv->vis.my_info->skb_packet = netdev_alloc_skb_ip_align(NULL,
859 bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); 861 len);
860 if (!bat_priv->vis.my_info->skb_packet) 862 if (!bat_priv->vis.my_info->skb_packet)
861 goto free_info; 863 goto free_info;
862 864
863 skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN); 865 bat_priv->vis.my_info->skb_packet->priority = TC_PRIO_CONTROL;
866 skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN);
864 tmp_skb = bat_priv->vis.my_info->skb_packet; 867 tmp_skb = bat_priv->vis.my_info->skb_packet;
865 packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); 868 packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet));
866 869
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6c7f36379722..f0817121ec5e 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -31,6 +31,24 @@
31#include <net/bluetooth/a2mp.h> 31#include <net/bluetooth/a2mp.h>
32#include <net/bluetooth/smp.h> 32#include <net/bluetooth/smp.h>
33 33
34struct sco_param {
35 u16 pkt_type;
36 u16 max_latency;
37};
38
39static const struct sco_param sco_param_cvsd[] = {
40 { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000a }, /* S3 */
41 { EDR_ESCO_MASK & ~ESCO_2EV3, 0x0007 }, /* S2 */
42 { EDR_ESCO_MASK | ESCO_EV3, 0x0007 }, /* S1 */
43 { EDR_ESCO_MASK | ESCO_HV3, 0xffff }, /* D1 */
44 { EDR_ESCO_MASK | ESCO_HV1, 0xffff }, /* D0 */
45};
46
47static const struct sco_param sco_param_wideband[] = {
48 { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000d }, /* T2 */
49 { EDR_ESCO_MASK | ESCO_EV3, 0x0008 }, /* T1 */
50};
51
34static void hci_le_create_connection(struct hci_conn *conn) 52static void hci_le_create_connection(struct hci_conn *conn)
35{ 53{
36 struct hci_dev *hdev = conn->hdev; 54 struct hci_dev *hdev = conn->hdev;
@@ -172,10 +190,11 @@ static void hci_add_sco(struct hci_conn *conn, __u16 handle)
172 hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp); 190 hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp);
173} 191}
174 192
175void hci_setup_sync(struct hci_conn *conn, __u16 handle) 193bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
176{ 194{
177 struct hci_dev *hdev = conn->hdev; 195 struct hci_dev *hdev = conn->hdev;
178 struct hci_cp_setup_sync_conn cp; 196 struct hci_cp_setup_sync_conn cp;
197 const struct sco_param *param;
179 198
180 BT_DBG("hcon %p", conn); 199 BT_DBG("hcon %p", conn);
181 200
@@ -185,15 +204,35 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
185 conn->attempt++; 204 conn->attempt++;
186 205
187 cp.handle = cpu_to_le16(handle); 206 cp.handle = cpu_to_le16(handle);
188 cp.pkt_type = cpu_to_le16(conn->pkt_type);
189 207
190 cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40); 208 cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
191 cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40); 209 cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
192 cp.max_latency = __constant_cpu_to_le16(0xffff); 210 cp.voice_setting = cpu_to_le16(conn->setting);
193 cp.voice_setting = cpu_to_le16(hdev->voice_setting); 211
194 cp.retrans_effort = 0xff; 212 switch (conn->setting & SCO_AIRMODE_MASK) {
213 case SCO_AIRMODE_TRANSP:
214 if (conn->attempt > ARRAY_SIZE(sco_param_wideband))
215 return false;
216 cp.retrans_effort = 0x02;
217 param = &sco_param_wideband[conn->attempt - 1];
218 break;
219 case SCO_AIRMODE_CVSD:
220 if (conn->attempt > ARRAY_SIZE(sco_param_cvsd))
221 return false;
222 cp.retrans_effort = 0x01;
223 param = &sco_param_cvsd[conn->attempt - 1];
224 break;
225 default:
226 return false;
227 }
195 228
196 hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); 229 cp.pkt_type = __cpu_to_le16(param->pkt_type);
230 cp.max_latency = __cpu_to_le16(param->max_latency);
231
232 if (hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0)
233 return false;
234
235 return true;
197} 236}
198 237
199void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, 238void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max,
@@ -560,13 +599,13 @@ static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
560 return acl; 599 return acl;
561} 600}
562 601
563static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, 602struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
564 bdaddr_t *dst, u8 sec_level, u8 auth_type) 603 __u16 setting)
565{ 604{
566 struct hci_conn *acl; 605 struct hci_conn *acl;
567 struct hci_conn *sco; 606 struct hci_conn *sco;
568 607
569 acl = hci_connect_acl(hdev, dst, sec_level, auth_type); 608 acl = hci_connect_acl(hdev, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING);
570 if (IS_ERR(acl)) 609 if (IS_ERR(acl))
571 return acl; 610 return acl;
572 611
@@ -584,6 +623,8 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
584 623
585 hci_conn_hold(sco); 624 hci_conn_hold(sco);
586 625
626 sco->setting = setting;
627
587 if (acl->state == BT_CONNECTED && 628 if (acl->state == BT_CONNECTED &&
588 (sco->state == BT_OPEN || sco->state == BT_CLOSED)) { 629 (sco->state == BT_OPEN || sco->state == BT_CLOSED)) {
589 set_bit(HCI_CONN_POWER_SAVE, &acl->flags); 630 set_bit(HCI_CONN_POWER_SAVE, &acl->flags);
@@ -612,9 +653,6 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
612 return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type); 653 return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type);
613 case ACL_LINK: 654 case ACL_LINK:
614 return hci_connect_acl(hdev, dst, sec_level, auth_type); 655 return hci_connect_acl(hdev, dst, sec_level, auth_type);
615 case SCO_LINK:
616 case ESCO_LINK:
617 return hci_connect_sco(hdev, type, dst, sec_level, auth_type);
618 } 656 }
619 657
620 return ERR_PTR(-EINVAL); 658 return ERR_PTR(-EINVAL);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 33843c5c4939..fb7356fcfe51 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -341,7 +341,6 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt)
341 341
342static void bredr_setup(struct hci_request *req) 342static void bredr_setup(struct hci_request *req)
343{ 343{
344 struct hci_cp_delete_stored_link_key cp;
345 __le16 param; 344 __le16 param;
346 __u8 flt_type; 345 __u8 flt_type;
347 346
@@ -365,10 +364,6 @@ static void bredr_setup(struct hci_request *req)
365 param = __constant_cpu_to_le16(0x7d00); 364 param = __constant_cpu_to_le16(0x7d00);
366 hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param); 365 hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
367 366
368 bacpy(&cp.bdaddr, BDADDR_ANY);
369 cp.delete_all = 0x01;
370 hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
371
372 /* Read page scan parameters */ 367 /* Read page scan parameters */
373 if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) { 368 if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) {
374 hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); 369 hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
@@ -459,6 +454,18 @@ static void hci_setup_event_mask(struct hci_request *req)
459 events[4] |= 0x04; /* Read Remote Extended Features Complete */ 454 events[4] |= 0x04; /* Read Remote Extended Features Complete */
460 events[5] |= 0x08; /* Synchronous Connection Complete */ 455 events[5] |= 0x08; /* Synchronous Connection Complete */
461 events[5] |= 0x10; /* Synchronous Connection Changed */ 456 events[5] |= 0x10; /* Synchronous Connection Changed */
457 } else {
458 /* Use a different default for LE-only devices */
459 memset(events, 0, sizeof(events));
460 events[0] |= 0x10; /* Disconnection Complete */
461 events[0] |= 0x80; /* Encryption Change */
462 events[1] |= 0x08; /* Read Remote Version Information Complete */
463 events[1] |= 0x20; /* Command Complete */
464 events[1] |= 0x40; /* Command Status */
465 events[1] |= 0x80; /* Hardware Error */
466 events[2] |= 0x04; /* Number of Completed Packets */
467 events[3] |= 0x02; /* Data Buffer Overflow */
468 events[5] |= 0x80; /* Encryption Key Refresh Complete */
462 } 469 }
463 470
464 if (lmp_inq_rssi_capable(hdev)) 471 if (lmp_inq_rssi_capable(hdev))
@@ -518,7 +525,10 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
518 525
519 hci_setup_event_mask(req); 526 hci_setup_event_mask(req);
520 527
521 if (hdev->hci_ver > BLUETOOTH_VER_1_1) 528 /* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read
529 * local supported commands HCI command.
530 */
531 if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1)
522 hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); 532 hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
523 533
524 if (lmp_ssp_capable(hdev)) { 534 if (lmp_ssp_capable(hdev)) {
@@ -602,6 +612,24 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
602 struct hci_dev *hdev = req->hdev; 612 struct hci_dev *hdev = req->hdev;
603 u8 p; 613 u8 p;
604 614
615 /* Some Broadcom based Bluetooth controllers do not support the
616 * Delete Stored Link Key command. They are clearly indicating its
617 * absence in the bit mask of supported commands.
618 *
619 * Check the supported commands and only if the the command is marked
620 * as supported send it. If not supported assume that the controller
621 * does not have actual support for stored link keys which makes this
622 * command redundant anyway.
623 */
624 if (hdev->commands[6] & 0x80) {
625 struct hci_cp_delete_stored_link_key cp;
626
627 bacpy(&cp.bdaddr, BDADDR_ANY);
628 cp.delete_all = 0x01;
629 hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY,
630 sizeof(cp), &cp);
631 }
632
605 if (hdev->commands[5] & 0x10) 633 if (hdev->commands[5] & 0x10)
606 hci_setup_link_policy(req); 634 hci_setup_link_policy(req);
607 635
@@ -746,7 +774,7 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state)
746 hdev->discovery.state = state; 774 hdev->discovery.state = state;
747} 775}
748 776
749static void inquiry_cache_flush(struct hci_dev *hdev) 777void hci_inquiry_cache_flush(struct hci_dev *hdev)
750{ 778{
751 struct discovery_state *cache = &hdev->discovery; 779 struct discovery_state *cache = &hdev->discovery;
752 struct inquiry_entry *p, *n; 780 struct inquiry_entry *p, *n;
@@ -959,7 +987,7 @@ int hci_inquiry(void __user *arg)
959 hci_dev_lock(hdev); 987 hci_dev_lock(hdev);
960 if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || 988 if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX ||
961 inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) { 989 inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) {
962 inquiry_cache_flush(hdev); 990 hci_inquiry_cache_flush(hdev);
963 do_inquiry = 1; 991 do_inquiry = 1;
964 } 992 }
965 hci_dev_unlock(hdev); 993 hci_dev_unlock(hdev);
@@ -1118,7 +1146,11 @@ int hci_dev_open(__u16 dev)
1118 goto done; 1146 goto done;
1119 } 1147 }
1120 1148
1121 if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { 1149 /* Check for rfkill but allow the HCI setup stage to proceed
1150 * (which in itself doesn't cause any RF activity).
1151 */
1152 if (test_bit(HCI_RFKILLED, &hdev->dev_flags) &&
1153 !test_bit(HCI_SETUP, &hdev->dev_flags)) {
1122 ret = -ERFKILL; 1154 ret = -ERFKILL;
1123 goto done; 1155 goto done;
1124 } 1156 }
@@ -1196,8 +1228,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
1196{ 1228{
1197 BT_DBG("%s %p", hdev->name, hdev); 1229 BT_DBG("%s %p", hdev->name, hdev);
1198 1230
1199 cancel_work_sync(&hdev->le_scan);
1200
1201 cancel_delayed_work(&hdev->power_off); 1231 cancel_delayed_work(&hdev->power_off);
1202 1232
1203 hci_req_cancel(hdev, ENODEV); 1233 hci_req_cancel(hdev, ENODEV);
@@ -1225,7 +1255,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
1225 cancel_delayed_work_sync(&hdev->le_scan_disable); 1255 cancel_delayed_work_sync(&hdev->le_scan_disable);
1226 1256
1227 hci_dev_lock(hdev); 1257 hci_dev_lock(hdev);
1228 inquiry_cache_flush(hdev); 1258 hci_inquiry_cache_flush(hdev);
1229 hci_conn_hash_flush(hdev); 1259 hci_conn_hash_flush(hdev);
1230 hci_dev_unlock(hdev); 1260 hci_dev_unlock(hdev);
1231 1261
@@ -1326,7 +1356,7 @@ int hci_dev_reset(__u16 dev)
1326 skb_queue_purge(&hdev->cmd_q); 1356 skb_queue_purge(&hdev->cmd_q);
1327 1357
1328 hci_dev_lock(hdev); 1358 hci_dev_lock(hdev);
1329 inquiry_cache_flush(hdev); 1359 hci_inquiry_cache_flush(hdev);
1330 hci_conn_hash_flush(hdev); 1360 hci_conn_hash_flush(hdev);
1331 hci_dev_unlock(hdev); 1361 hci_dev_unlock(hdev);
1332 1362
@@ -1540,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked)
1540 1570
1541 BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); 1571 BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
1542 1572
1543 if (!blocked) 1573 if (blocked) {
1544 return 0; 1574 set_bit(HCI_RFKILLED, &hdev->dev_flags);
1545 1575 if (!test_bit(HCI_SETUP, &hdev->dev_flags))
1546 hci_dev_do_close(hdev); 1576 hci_dev_do_close(hdev);
1577 } else {
1578 clear_bit(HCI_RFKILLED, &hdev->dev_flags);
1579 }
1547 1580
1548 return 0; 1581 return 0;
1549} 1582}
@@ -1555,15 +1588,23 @@ static const struct rfkill_ops hci_rfkill_ops = {
1555static void hci_power_on(struct work_struct *work) 1588static void hci_power_on(struct work_struct *work)
1556{ 1589{
1557 struct hci_dev *hdev = container_of(work, struct hci_dev, power_on); 1590 struct hci_dev *hdev = container_of(work, struct hci_dev, power_on);
1591 int err;
1558 1592
1559 BT_DBG("%s", hdev->name); 1593 BT_DBG("%s", hdev->name);
1560 1594
1561 if (hci_dev_open(hdev->id) < 0) 1595 err = hci_dev_open(hdev->id);
1596 if (err < 0) {
1597 mgmt_set_powered_failed(hdev, err);
1562 return; 1598 return;
1599 }
1563 1600
1564 if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) 1601 if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) {
1602 clear_bit(HCI_AUTO_OFF, &hdev->dev_flags);
1603 hci_dev_do_close(hdev);
1604 } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
1565 queue_delayed_work(hdev->req_workqueue, &hdev->power_off, 1605 queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
1566 HCI_AUTO_OFF_TIMEOUT); 1606 HCI_AUTO_OFF_TIMEOUT);
1607 }
1567 1608
1568 if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) 1609 if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags))
1569 mgmt_index_added(hdev); 1610 mgmt_index_added(hdev);
@@ -1982,80 +2023,59 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
1982 return mgmt_device_unblocked(hdev, bdaddr, type); 2023 return mgmt_device_unblocked(hdev, bdaddr, type);
1983} 2024}
1984 2025
1985static void le_scan_param_req(struct hci_request *req, unsigned long opt) 2026static void inquiry_complete(struct hci_dev *hdev, u8 status)
1986{ 2027{
1987 struct le_scan_params *param = (struct le_scan_params *) opt; 2028 if (status) {
1988 struct hci_cp_le_set_scan_param cp; 2029 BT_ERR("Failed to start inquiry: status %d", status);
1989
1990 memset(&cp, 0, sizeof(cp));
1991 cp.type = param->type;
1992 cp.interval = cpu_to_le16(param->interval);
1993 cp.window = cpu_to_le16(param->window);
1994
1995 hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp);
1996}
1997
1998static void le_scan_enable_req(struct hci_request *req, unsigned long opt)
1999{
2000 struct hci_cp_le_set_scan_enable cp;
2001
2002 memset(&cp, 0, sizeof(cp));
2003 cp.enable = LE_SCAN_ENABLE;
2004 cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
2005 2030
2006 hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); 2031 hci_dev_lock(hdev);
2032 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
2033 hci_dev_unlock(hdev);
2034 return;
2035 }
2007} 2036}
2008 2037
2009static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, 2038static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status)
2010 u16 window, int timeout)
2011{ 2039{
2012 long timeo = msecs_to_jiffies(3000); 2040 /* General inquiry access code (GIAC) */
2013 struct le_scan_params param; 2041 u8 lap[3] = { 0x33, 0x8b, 0x9e };
2042 struct hci_request req;
2043 struct hci_cp_inquiry cp;
2014 int err; 2044 int err;
2015 2045
2016 BT_DBG("%s", hdev->name); 2046 if (status) {
2017 2047 BT_ERR("Failed to disable LE scanning: status %d", status);
2018 if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) 2048 return;
2019 return -EINPROGRESS; 2049 }
2020
2021 param.type = type;
2022 param.interval = interval;
2023 param.window = window;
2024
2025 hci_req_lock(hdev);
2026
2027 err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) &param,
2028 timeo);
2029 if (!err)
2030 err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo);
2031
2032 hci_req_unlock(hdev);
2033 2050
2034 if (err < 0) 2051 switch (hdev->discovery.type) {
2035 return err; 2052 case DISCOV_TYPE_LE:
2053 hci_dev_lock(hdev);
2054 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
2055 hci_dev_unlock(hdev);
2056 break;
2036 2057
2037 queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, 2058 case DISCOV_TYPE_INTERLEAVED:
2038 timeout); 2059 hci_req_init(&req, hdev);
2039 2060
2040 return 0; 2061 memset(&cp, 0, sizeof(cp));
2041} 2062 memcpy(&cp.lap, lap, sizeof(cp.lap));
2063 cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN;
2064 hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp);
2042 2065
2043int hci_cancel_le_scan(struct hci_dev *hdev) 2066 hci_dev_lock(hdev);
2044{
2045 BT_DBG("%s", hdev->name);
2046 2067
2047 if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) 2068 hci_inquiry_cache_flush(hdev);
2048 return -EALREADY;
2049 2069
2050 if (cancel_delayed_work(&hdev->le_scan_disable)) { 2070 err = hci_req_run(&req, inquiry_complete);
2051 struct hci_cp_le_set_scan_enable cp; 2071 if (err) {
2072 BT_ERR("Inquiry request failed: err %d", err);
2073 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
2074 }
2052 2075
2053 /* Send HCI command to disable LE Scan */ 2076 hci_dev_unlock(hdev);
2054 memset(&cp, 0, sizeof(cp)); 2077 break;
2055 hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
2056 } 2078 }
2057
2058 return 0;
2059} 2079}
2060 2080
2061static void le_scan_disable_work(struct work_struct *work) 2081static void le_scan_disable_work(struct work_struct *work)
@@ -2063,46 +2083,20 @@ static void le_scan_disable_work(struct work_struct *work)
2063 struct hci_dev *hdev = container_of(work, struct hci_dev, 2083 struct hci_dev *hdev = container_of(work, struct hci_dev,
2064 le_scan_disable.work); 2084 le_scan_disable.work);
2065 struct hci_cp_le_set_scan_enable cp; 2085 struct hci_cp_le_set_scan_enable cp;
2086 struct hci_request req;
2087 int err;
2066 2088
2067 BT_DBG("%s", hdev->name); 2089 BT_DBG("%s", hdev->name);
2068 2090
2069 memset(&cp, 0, sizeof(cp)); 2091 hci_req_init(&req, hdev);
2070
2071 hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
2072}
2073
2074static void le_scan_work(struct work_struct *work)
2075{
2076 struct hci_dev *hdev = container_of(work, struct hci_dev, le_scan);
2077 struct le_scan_params *param = &hdev->le_scan_params;
2078
2079 BT_DBG("%s", hdev->name);
2080
2081 hci_do_le_scan(hdev, param->type, param->interval, param->window,
2082 param->timeout);
2083}
2084
2085int hci_le_scan(struct hci_dev *hdev, u8 type, u16 interval, u16 window,
2086 int timeout)
2087{
2088 struct le_scan_params *param = &hdev->le_scan_params;
2089
2090 BT_DBG("%s", hdev->name);
2091
2092 if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags))
2093 return -ENOTSUPP;
2094
2095 if (work_busy(&hdev->le_scan))
2096 return -EINPROGRESS;
2097
2098 param->type = type;
2099 param->interval = interval;
2100 param->window = window;
2101 param->timeout = timeout;
2102 2092
2103 queue_work(system_long_wq, &hdev->le_scan); 2093 memset(&cp, 0, sizeof(cp));
2094 cp.enable = LE_SCAN_DISABLE;
2095 hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
2104 2096
2105 return 0; 2097 err = hci_req_run(&req, le_scan_disable_work_complete);
2098 if (err)
2099 BT_ERR("Disable LE scanning request failed: err %d", err);
2106} 2100}
2107 2101
2108/* Alloc HCI device */ 2102/* Alloc HCI device */
@@ -2139,7 +2133,6 @@ struct hci_dev *hci_alloc_dev(void)
2139 INIT_WORK(&hdev->cmd_work, hci_cmd_work); 2133 INIT_WORK(&hdev->cmd_work, hci_cmd_work);
2140 INIT_WORK(&hdev->tx_work, hci_tx_work); 2134 INIT_WORK(&hdev->tx_work, hci_tx_work);
2141 INIT_WORK(&hdev->power_on, hci_power_on); 2135 INIT_WORK(&hdev->power_on, hci_power_on);
2142 INIT_WORK(&hdev->le_scan, le_scan_work);
2143 2136
2144 INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); 2137 INIT_DELAYED_WORK(&hdev->power_off, hci_power_off);
2145 INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); 2138 INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
@@ -2198,20 +2191,15 @@ int hci_register_dev(struct hci_dev *hdev)
2198 2191
2199 BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); 2192 BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
2200 2193
2201 write_lock(&hci_dev_list_lock); 2194 hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
2202 list_add(&hdev->list, &hci_dev_list); 2195 WQ_MEM_RECLAIM, 1, hdev->name);
2203 write_unlock(&hci_dev_list_lock);
2204
2205 hdev->workqueue = alloc_workqueue(hdev->name, WQ_HIGHPRI | WQ_UNBOUND |
2206 WQ_MEM_RECLAIM, 1);
2207 if (!hdev->workqueue) { 2196 if (!hdev->workqueue) {
2208 error = -ENOMEM; 2197 error = -ENOMEM;
2209 goto err; 2198 goto err;
2210 } 2199 }
2211 2200
2212 hdev->req_workqueue = alloc_workqueue(hdev->name, 2201 hdev->req_workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
2213 WQ_HIGHPRI | WQ_UNBOUND | 2202 WQ_MEM_RECLAIM, 1, hdev->name);
2214 WQ_MEM_RECLAIM, 1);
2215 if (!hdev->req_workqueue) { 2203 if (!hdev->req_workqueue) {
2216 destroy_workqueue(hdev->workqueue); 2204 destroy_workqueue(hdev->workqueue);
2217 error = -ENOMEM; 2205 error = -ENOMEM;
@@ -2232,11 +2220,18 @@ int hci_register_dev(struct hci_dev *hdev)
2232 } 2220 }
2233 } 2221 }
2234 2222
2223 if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
2224 set_bit(HCI_RFKILLED, &hdev->dev_flags);
2225
2235 set_bit(HCI_SETUP, &hdev->dev_flags); 2226 set_bit(HCI_SETUP, &hdev->dev_flags);
2236 2227
2237 if (hdev->dev_type != HCI_AMP) 2228 if (hdev->dev_type != HCI_AMP)
2238 set_bit(HCI_AUTO_OFF, &hdev->dev_flags); 2229 set_bit(HCI_AUTO_OFF, &hdev->dev_flags);
2239 2230
2231 write_lock(&hci_dev_list_lock);
2232 list_add(&hdev->list, &hci_dev_list);
2233 write_unlock(&hci_dev_list_lock);
2234
2240 hci_notify(hdev, HCI_DEV_REG); 2235 hci_notify(hdev, HCI_DEV_REG);
2241 hci_dev_hold(hdev); 2236 hci_dev_hold(hdev);
2242 2237
@@ -2249,9 +2244,6 @@ err_wqueue:
2249 destroy_workqueue(hdev->req_workqueue); 2244 destroy_workqueue(hdev->req_workqueue);
2250err: 2245err:
2251 ida_simple_remove(&hci_index_ida, hdev->id); 2246 ida_simple_remove(&hci_index_ida, hdev->id);
2252 write_lock(&hci_dev_list_lock);
2253 list_del(&hdev->list);
2254 write_unlock(&hci_dev_list_lock);
2255 2247
2256 return error; 2248 return error;
2257} 2249}
@@ -3433,8 +3425,16 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
3433 */ 3425 */
3434 if (hdev->sent_cmd) { 3426 if (hdev->sent_cmd) {
3435 req_complete = bt_cb(hdev->sent_cmd)->req.complete; 3427 req_complete = bt_cb(hdev->sent_cmd)->req.complete;
3436 if (req_complete) 3428
3429 if (req_complete) {
3430 /* We must set the complete callback to NULL to
3431 * avoid calling the callback more than once if
3432 * this function gets called again.
3433 */
3434 bt_cb(hdev->sent_cmd)->req.complete = NULL;
3435
3437 goto call_complete; 3436 goto call_complete;
3437 }
3438 } 3438 }
3439 3439
3440 /* Remove all pending commands belonging to this request */ 3440 /* Remove all pending commands belonging to this request */
@@ -3542,36 +3542,6 @@ static void hci_cmd_work(struct work_struct *work)
3542 } 3542 }
3543} 3543}
3544 3544
3545int hci_do_inquiry(struct hci_dev *hdev, u8 length)
3546{
3547 /* General inquiry access code (GIAC) */
3548 u8 lap[3] = { 0x33, 0x8b, 0x9e };
3549 struct hci_cp_inquiry cp;
3550
3551 BT_DBG("%s", hdev->name);
3552
3553 if (test_bit(HCI_INQUIRY, &hdev->flags))
3554 return -EINPROGRESS;
3555
3556 inquiry_cache_flush(hdev);
3557
3558 memset(&cp, 0, sizeof(cp));
3559 memcpy(&cp.lap, lap, sizeof(cp.lap));
3560 cp.length = length;
3561
3562 return hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp);
3563}
3564
3565int hci_cancel_inquiry(struct hci_dev *hdev)
3566{
3567 BT_DBG("%s", hdev->name);
3568
3569 if (!test_bit(HCI_INQUIRY, &hdev->flags))
3570 return -EALREADY;
3571
3572 return hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL);
3573}
3574
3575u8 bdaddr_to_le(u8 bdaddr_type) 3545u8 bdaddr_to_le(u8 bdaddr_type)
3576{ 3546{
3577 switch (bdaddr_type) { 3547 switch (bdaddr_type) {
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index b93cd2eb5d58..8db3e89fae35 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -40,21 +40,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
40 40
41 BT_DBG("%s status 0x%2.2x", hdev->name, status); 41 BT_DBG("%s status 0x%2.2x", hdev->name, status);
42 42
43 if (status) { 43 if (status)
44 hci_dev_lock(hdev);
45 mgmt_stop_discovery_failed(hdev, status);
46 hci_dev_unlock(hdev);
47 return; 44 return;
48 }
49 45
50 clear_bit(HCI_INQUIRY, &hdev->flags); 46 clear_bit(HCI_INQUIRY, &hdev->flags);
51 smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */ 47 smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
52 wake_up_bit(&hdev->flags, HCI_INQUIRY); 48 wake_up_bit(&hdev->flags, HCI_INQUIRY);
53 49
54 hci_dev_lock(hdev);
55 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
56 hci_dev_unlock(hdev);
57
58 hci_conn_check_pending(hdev); 50 hci_conn_check_pending(hdev);
59} 51}
60 52
@@ -937,20 +929,6 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
937 hci_dev_unlock(hdev); 929 hci_dev_unlock(hdev);
938} 930}
939 931
940static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
941{
942 __u8 status = *((__u8 *) skb->data);
943
944 BT_DBG("%s status 0x%2.2x", hdev->name, status);
945
946 if (status) {
947 hci_dev_lock(hdev);
948 mgmt_start_discovery_failed(hdev, status);
949 hci_dev_unlock(hdev);
950 return;
951 }
952}
953
954static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, 932static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
955 struct sk_buff *skb) 933 struct sk_buff *skb)
956{ 934{
@@ -963,41 +941,16 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
963 if (!cp) 941 if (!cp)
964 return; 942 return;
965 943
944 if (status)
945 return;
946
966 switch (cp->enable) { 947 switch (cp->enable) {
967 case LE_SCAN_ENABLE: 948 case LE_SCAN_ENABLE:
968 if (status) {
969 hci_dev_lock(hdev);
970 mgmt_start_discovery_failed(hdev, status);
971 hci_dev_unlock(hdev);
972 return;
973 }
974
975 set_bit(HCI_LE_SCAN, &hdev->dev_flags); 949 set_bit(HCI_LE_SCAN, &hdev->dev_flags);
976
977 hci_dev_lock(hdev);
978 hci_discovery_set_state(hdev, DISCOVERY_FINDING);
979 hci_dev_unlock(hdev);
980 break; 950 break;
981 951
982 case LE_SCAN_DISABLE: 952 case LE_SCAN_DISABLE:
983 if (status) {
984 hci_dev_lock(hdev);
985 mgmt_stop_discovery_failed(hdev, status);
986 hci_dev_unlock(hdev);
987 return;
988 }
989
990 clear_bit(HCI_LE_SCAN, &hdev->dev_flags); 953 clear_bit(HCI_LE_SCAN, &hdev->dev_flags);
991
992 if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED &&
993 hdev->discovery.state == DISCOVERY_FINDING) {
994 mgmt_interleaved_discovery(hdev);
995 } else {
996 hci_dev_lock(hdev);
997 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
998 hci_dev_unlock(hdev);
999 }
1000
1001 break; 954 break;
1002 955
1003 default: 956 default:
@@ -1077,18 +1030,10 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
1077 1030
1078 if (status) { 1031 if (status) {
1079 hci_conn_check_pending(hdev); 1032 hci_conn_check_pending(hdev);
1080 hci_dev_lock(hdev);
1081 if (test_bit(HCI_MGMT, &hdev->dev_flags))
1082 mgmt_start_discovery_failed(hdev, status);
1083 hci_dev_unlock(hdev);
1084 return; 1033 return;
1085 } 1034 }
1086 1035
1087 set_bit(HCI_INQUIRY, &hdev->flags); 1036 set_bit(HCI_INQUIRY, &hdev->flags);
1088
1089 hci_dev_lock(hdev);
1090 hci_discovery_set_state(hdev, DISCOVERY_FINDING);
1091 hci_dev_unlock(hdev);
1092} 1037}
1093 1038
1094static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) 1039static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
@@ -2298,10 +2243,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2298 hci_cc_user_passkey_neg_reply(hdev, skb); 2243 hci_cc_user_passkey_neg_reply(hdev, skb);
2299 break; 2244 break;
2300 2245
2301 case HCI_OP_LE_SET_SCAN_PARAM:
2302 hci_cc_le_set_scan_param(hdev, skb);
2303 break;
2304
2305 case HCI_OP_LE_SET_ADV_ENABLE: 2246 case HCI_OP_LE_SET_ADV_ENABLE:
2306 hci_cc_le_set_adv_enable(hdev, skb); 2247 hci_cc_le_set_adv_enable(hdev, skb);
2307 break; 2248 break;
@@ -2670,7 +2611,7 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
2670 2611
2671 BT_DBG("%s", hdev->name); 2612 BT_DBG("%s", hdev->name);
2672 2613
2673 if (!test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) 2614 if (!test_bit(HCI_MGMT, &hdev->dev_flags))
2674 return; 2615 return;
2675 2616
2676 hci_dev_lock(hdev); 2617 hci_dev_lock(hdev);
@@ -2746,7 +2687,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
2746 hci_conn_drop(conn); 2687 hci_conn_drop(conn);
2747 } 2688 }
2748 2689
2749 if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) 2690 if (test_bit(HCI_MGMT, &hdev->dev_flags))
2750 hci_add_link_key(hdev, conn, 1, &ev->bdaddr, ev->link_key, 2691 hci_add_link_key(hdev, conn, 1, &ev->bdaddr, ev->link_key,
2751 ev->key_type, pin_len); 2692 ev->key_type, pin_len);
2752 2693
@@ -2963,15 +2904,16 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
2963 hci_conn_add_sysfs(conn); 2904 hci_conn_add_sysfs(conn);
2964 break; 2905 break;
2965 2906
2907 case 0x0d: /* Connection Rejected due to Limited Resources */
2966 case 0x11: /* Unsupported Feature or Parameter Value */ 2908 case 0x11: /* Unsupported Feature or Parameter Value */
2967 case 0x1c: /* SCO interval rejected */ 2909 case 0x1c: /* SCO interval rejected */
2968 case 0x1a: /* Unsupported Remote Feature */ 2910 case 0x1a: /* Unsupported Remote Feature */
2969 case 0x1f: /* Unspecified error */ 2911 case 0x1f: /* Unspecified error */
2970 if (conn->out && conn->attempt < 2) { 2912 if (conn->out) {
2971 conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | 2913 conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
2972 (hdev->esco_type & EDR_ESCO_MASK); 2914 (hdev->esco_type & EDR_ESCO_MASK);
2973 hci_setup_sync(conn, conn->link->handle); 2915 if (hci_setup_sync(conn, conn->link->handle))
2974 goto unlock; 2916 goto unlock;
2975 } 2917 }
2976 /* fall through */ 2918 /* fall through */
2977 2919
@@ -3083,17 +3025,20 @@ unlock:
3083static u8 hci_get_auth_req(struct hci_conn *conn) 3025static u8 hci_get_auth_req(struct hci_conn *conn)
3084{ 3026{
3085 /* If remote requests dedicated bonding follow that lead */ 3027 /* If remote requests dedicated bonding follow that lead */
3086 if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) { 3028 if (conn->remote_auth == HCI_AT_DEDICATED_BONDING ||
3029 conn->remote_auth == HCI_AT_DEDICATED_BONDING_MITM) {
3087 /* If both remote and local IO capabilities allow MITM 3030 /* If both remote and local IO capabilities allow MITM
3088 * protection then require it, otherwise don't */ 3031 * protection then require it, otherwise don't */
3089 if (conn->remote_cap == 0x03 || conn->io_capability == 0x03) 3032 if (conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT ||
3090 return 0x02; 3033 conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)
3034 return HCI_AT_DEDICATED_BONDING;
3091 else 3035 else
3092 return 0x03; 3036 return HCI_AT_DEDICATED_BONDING_MITM;
3093 } 3037 }
3094 3038
3095 /* If remote requests no-bonding follow that lead */ 3039 /* If remote requests no-bonding follow that lead */
3096 if (conn->remote_auth == 0x00 || conn->remote_auth == 0x01) 3040 if (conn->remote_auth == HCI_AT_NO_BONDING ||
3041 conn->remote_auth == HCI_AT_NO_BONDING_MITM)
3097 return conn->remote_auth | (conn->auth_type & 0x01); 3042 return conn->remote_auth | (conn->auth_type & 0x01);
3098 3043
3099 return conn->auth_type; 3044 return conn->auth_type;
@@ -3125,7 +3070,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
3125 /* Change the IO capability from KeyboardDisplay 3070 /* Change the IO capability from KeyboardDisplay
3126 * to DisplayYesNo as it is not supported by BT spec. */ 3071 * to DisplayYesNo as it is not supported by BT spec. */
3127 cp.capability = (conn->io_capability == 0x04) ? 3072 cp.capability = (conn->io_capability == 0x04) ?
3128 0x01 : conn->io_capability; 3073 HCI_IO_DISPLAY_YESNO : conn->io_capability;
3129 conn->auth_type = hci_get_auth_req(conn); 3074 conn->auth_type = hci_get_auth_req(conn);
3130 cp.authentication = conn->auth_type; 3075 cp.authentication = conn->auth_type;
3131 3076
@@ -3199,7 +3144,8 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
3199 * request. The only exception is when we're dedicated bonding 3144 * request. The only exception is when we're dedicated bonding
3200 * initiators (connect_cfm_cb set) since then we always have the MITM 3145 * initiators (connect_cfm_cb set) since then we always have the MITM
3201 * bit set. */ 3146 * bit set. */
3202 if (!conn->connect_cfm_cb && loc_mitm && conn->remote_cap == 0x03) { 3147 if (!conn->connect_cfm_cb && loc_mitm &&
3148 conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) {
3203 BT_DBG("Rejecting request: remote device can't provide MITM"); 3149 BT_DBG("Rejecting request: remote device can't provide MITM");
3204 hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY, 3150 hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY,
3205 sizeof(ev->bdaddr), &ev->bdaddr); 3151 sizeof(ev->bdaddr), &ev->bdaddr);
@@ -3207,8 +3153,8 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
3207 } 3153 }
3208 3154
3209 /* If no side requires MITM protection; auto-accept */ 3155 /* If no side requires MITM protection; auto-accept */
3210 if ((!loc_mitm || conn->remote_cap == 0x03) && 3156 if ((!loc_mitm || conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) &&
3211 (!rem_mitm || conn->io_capability == 0x03)) { 3157 (!rem_mitm || conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)) {
3212 3158
3213 /* If we're not the initiators request authorization to 3159 /* If we're not the initiators request authorization to
3214 * proceed from user space (mgmt_user_confirm with 3160 * proceed from user space (mgmt_user_confirm with
@@ -3611,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
3611 cp.handle = cpu_to_le16(conn->handle); 3557 cp.handle = cpu_to_le16(conn->handle);
3612 3558
3613 if (ltk->authenticated) 3559 if (ltk->authenticated)
3614 conn->sec_level = BT_SECURITY_HIGH; 3560 conn->pending_sec_level = BT_SECURITY_HIGH;
3561 else
3562 conn->pending_sec_level = BT_SECURITY_MEDIUM;
3563
3564 conn->enc_key_size = ltk->enc_size;
3615 3565
3616 hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); 3566 hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);
3617 3567
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 7ad6ecf36f20..edf623a29043 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -590,7 +590,7 @@ int __init bt_sysfs_init(void)
590 590
591 bt_class = class_create(THIS_MODULE, "bluetooth"); 591 bt_class = class_create(THIS_MODULE, "bluetooth");
592 592
593 return PTR_RET(bt_class); 593 return PTR_ERR_OR_ZERO(bt_class);
594} 594}
595 595
596void bt_sysfs_cleanup(void) 596void bt_sysfs_cleanup(void)
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 940f5acb6694..bdc35a7a7fee 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -76,25 +76,19 @@ static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo
76 ci->flags = session->flags; 76 ci->flags = session->flags;
77 ci->state = BT_CONNECTED; 77 ci->state = BT_CONNECTED;
78 78
79 ci->vendor = 0x0000;
80 ci->product = 0x0000;
81 ci->version = 0x0000;
82
83 if (session->input) { 79 if (session->input) {
84 ci->vendor = session->input->id.vendor; 80 ci->vendor = session->input->id.vendor;
85 ci->product = session->input->id.product; 81 ci->product = session->input->id.product;
86 ci->version = session->input->id.version; 82 ci->version = session->input->id.version;
87 if (session->input->name) 83 if (session->input->name)
88 strncpy(ci->name, session->input->name, 128); 84 strlcpy(ci->name, session->input->name, 128);
89 else 85 else
90 strncpy(ci->name, "HID Boot Device", 128); 86 strlcpy(ci->name, "HID Boot Device", 128);
91 } 87 } else if (session->hid) {
92
93 if (session->hid) {
94 ci->vendor = session->hid->vendor; 88 ci->vendor = session->hid->vendor;
95 ci->product = session->hid->product; 89 ci->product = session->hid->product;
96 ci->version = session->hid->version; 90 ci->version = session->hid->version;
97 strncpy(ci->name, session->hid->name, 128); 91 strlcpy(ci->name, session->hid->name, 128);
98 } 92 }
99} 93}
100 94
@@ -231,17 +225,47 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
231 225
232static int hidp_send_report(struct hidp_session *session, struct hid_report *report) 226static int hidp_send_report(struct hidp_session *session, struct hid_report *report)
233{ 227{
234 unsigned char buf[32], hdr; 228 unsigned char hdr;
235 int rsize; 229 u8 *buf;
230 int rsize, ret;
236 231
237 rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0); 232 buf = hid_alloc_report_buf(report, GFP_ATOMIC);
238 if (rsize > sizeof(buf)) 233 if (!buf)
239 return -EIO; 234 return -EIO;
240 235
241 hid_output_report(report, buf); 236 hid_output_report(report, buf);
242 hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; 237 hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
243 238
244 return hidp_send_intr_message(session, hdr, buf, rsize); 239 rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0);
240 ret = hidp_send_intr_message(session, hdr, buf, rsize);
241
242 kfree(buf);
243 return ret;
244}
245
246static int hidp_hidinput_event(struct input_dev *dev, unsigned int type,
247 unsigned int code, int value)
248{
249 struct hid_device *hid = input_get_drvdata(dev);
250 struct hidp_session *session = hid->driver_data;
251 struct hid_field *field;
252 int offset;
253
254 BT_DBG("session %p type %d code %d value %d",
255 session, type, code, value);
256
257 if (type != EV_LED)
258 return -1;
259
260 offset = hidinput_find_field(hid, type, code, &field);
261 if (offset == -1) {
262 hid_warn(dev, "event field not found\n");
263 return -1;
264 }
265
266 hid_set_field(field, offset, value);
267
268 return hidp_send_report(session, field->report);
245} 269}
246 270
247static int hidp_get_raw_report(struct hid_device *hid, 271static int hidp_get_raw_report(struct hid_device *hid,
@@ -684,20 +708,6 @@ static int hidp_parse(struct hid_device *hid)
684 708
685static int hidp_start(struct hid_device *hid) 709static int hidp_start(struct hid_device *hid)
686{ 710{
687 struct hidp_session *session = hid->driver_data;
688 struct hid_report *report;
689
690 if (hid->quirks & HID_QUIRK_NO_INIT_REPORTS)
691 return 0;
692
693 list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].
694 report_list, list)
695 hidp_send_report(session, report);
696
697 list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].
698 report_list, list)
699 hidp_send_report(session, report);
700
701 return 0; 711 return 0;
702} 712}
703 713
@@ -717,6 +727,7 @@ static struct hid_ll_driver hidp_hid_driver = {
717 .stop = hidp_stop, 727 .stop = hidp_stop,
718 .open = hidp_open, 728 .open = hidp_open,
719 .close = hidp_close, 729 .close = hidp_close,
730 .hidinput_input_event = hidp_hidinput_event,
720}; 731};
721 732
722/* This function sets up the hid device. It does not add it 733/* This function sets up the hid device. It does not add it
@@ -851,6 +862,29 @@ static void hidp_session_dev_del(struct hidp_session *session)
851} 862}
852 863
853/* 864/*
865 * Asynchronous device registration
866 * HID device drivers might want to perform I/O during initialization to
867 * detect device types. Therefore, call device registration in a separate
868 * worker so the HIDP thread can schedule I/O operations.
869 * Note that this must be called after the worker thread was initialized
870 * successfully. This will then add the devices and increase session state
871 * on success, otherwise it will terminate the session thread.
872 */
873static void hidp_session_dev_work(struct work_struct *work)
874{
875 struct hidp_session *session = container_of(work,
876 struct hidp_session,
877 dev_init);
878 int ret;
879
880 ret = hidp_session_dev_add(session);
881 if (!ret)
882 atomic_inc(&session->state);
883 else
884 hidp_session_terminate(session);
885}
886
887/*
854 * Create new session object 888 * Create new session object
855 * Allocate session object, initialize static fields, copy input data into the 889 * Allocate session object, initialize static fields, copy input data into the
856 * object and take a reference to all sub-objects. 890 * object and take a reference to all sub-objects.
@@ -897,6 +931,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
897 session->idle_to = req->idle_to; 931 session->idle_to = req->idle_to;
898 932
899 /* device management */ 933 /* device management */
934 INIT_WORK(&session->dev_init, hidp_session_dev_work);
900 setup_timer(&session->timer, hidp_idle_timeout, 935 setup_timer(&session->timer, hidp_idle_timeout,
901 (unsigned long)session); 936 (unsigned long)session);
902 937
@@ -1035,8 +1070,8 @@ static void hidp_session_terminate(struct hidp_session *session)
1035 * Probe HIDP session 1070 * Probe HIDP session
1036 * This is called from the l2cap_conn core when our l2cap_user object is bound 1071 * This is called from the l2cap_conn core when our l2cap_user object is bound
1037 * to the hci-connection. We get the session via the \user object and can now 1072 * to the hci-connection. We get the session via the \user object and can now
1038 * start the session thread, register the HID/input devices and link it into 1073 * start the session thread, link it into the global session list and
1039 * the global session list. 1074 * schedule HID/input device registration.
1040 * The global session-list owns its own reference to the session object so you 1075 * The global session-list owns its own reference to the session object so you
1041 * can drop your own reference after registering the l2cap_user object. 1076 * can drop your own reference after registering the l2cap_user object.
1042 */ 1077 */
@@ -1058,21 +1093,30 @@ static int hidp_session_probe(struct l2cap_conn *conn,
1058 goto out_unlock; 1093 goto out_unlock;
1059 } 1094 }
1060 1095
1096 if (session->input) {
1097 ret = hidp_session_dev_add(session);
1098 if (ret)
1099 goto out_unlock;
1100 }
1101
1061 ret = hidp_session_start_sync(session); 1102 ret = hidp_session_start_sync(session);
1062 if (ret) 1103 if (ret)
1063 goto out_unlock; 1104 goto out_del;
1064 1105
1065 ret = hidp_session_dev_add(session); 1106 /* HID device registration is async to allow I/O during probe */
1066 if (ret) 1107 if (session->input)
1067 goto out_stop; 1108 atomic_inc(&session->state);
1109 else
1110 schedule_work(&session->dev_init);
1068 1111
1069 hidp_session_get(session); 1112 hidp_session_get(session);
1070 list_add(&session->list, &hidp_session_list); 1113 list_add(&session->list, &hidp_session_list);
1071 ret = 0; 1114 ret = 0;
1072 goto out_unlock; 1115 goto out_unlock;
1073 1116
1074out_stop: 1117out_del:
1075 hidp_session_terminate(session); 1118 if (session->input)
1119 hidp_session_dev_del(session);
1076out_unlock: 1120out_unlock:
1077 up_write(&hidp_session_sem); 1121 up_write(&hidp_session_sem);
1078 return ret; 1122 return ret;
@@ -1102,7 +1146,12 @@ static void hidp_session_remove(struct l2cap_conn *conn,
1102 down_write(&hidp_session_sem); 1146 down_write(&hidp_session_sem);
1103 1147
1104 hidp_session_terminate(session); 1148 hidp_session_terminate(session);
1105 hidp_session_dev_del(session); 1149
1150 cancel_work_sync(&session->dev_init);
1151 if (session->input ||
1152 atomic_read(&session->state) > HIDP_SESSION_PREPARING)
1153 hidp_session_dev_del(session);
1154
1106 list_del(&session->list); 1155 list_del(&session->list);
1107 1156
1108 up_write(&hidp_session_sem); 1157 up_write(&hidp_session_sem);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 6162ce8606ac..9e6cc3553105 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -128,6 +128,7 @@ int hidp_get_conninfo(struct hidp_conninfo *ci);
128 128
129enum hidp_session_state { 129enum hidp_session_state {
130 HIDP_SESSION_IDLING, 130 HIDP_SESSION_IDLING,
131 HIDP_SESSION_PREPARING,
131 HIDP_SESSION_RUNNING, 132 HIDP_SESSION_RUNNING,
132}; 133};
133 134
@@ -156,6 +157,7 @@ struct hidp_session {
156 unsigned long idle_to; 157 unsigned long idle_to;
157 158
158 /* device management */ 159 /* device management */
160 struct work_struct dev_init;
159 struct input_dev *input; 161 struct input_dev *input;
160 struct hid_device *hid; 162 struct hid_device *hid;
161 struct timer_list timer; 163 struct timer_list timer;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a76d1ac0321b..63fa11109a1c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -504,8 +504,10 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
504 if (conn->hcon->type == LE_LINK) { 504 if (conn->hcon->type == LE_LINK) {
505 /* LE connection */ 505 /* LE connection */
506 chan->omtu = L2CAP_DEFAULT_MTU; 506 chan->omtu = L2CAP_DEFAULT_MTU;
507 chan->scid = L2CAP_CID_LE_DATA; 507 if (chan->dcid == L2CAP_CID_ATT)
508 chan->dcid = L2CAP_CID_LE_DATA; 508 chan->scid = L2CAP_CID_ATT;
509 else
510 chan->scid = l2cap_alloc_cid(conn);
509 } else { 511 } else {
510 /* Alloc CID for connection-oriented socket */ 512 /* Alloc CID for connection-oriented socket */
511 chan->scid = l2cap_alloc_cid(conn); 513 chan->scid = l2cap_alloc_cid(conn);
@@ -543,6 +545,8 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
543 545
544 l2cap_chan_hold(chan); 546 l2cap_chan_hold(chan);
545 547
548 hci_conn_hold(conn->hcon);
549
546 list_add(&chan->list, &conn->chan_l); 550 list_add(&chan->list, &conn->chan_l);
547} 551}
548 552
@@ -1338,17 +1342,21 @@ static struct l2cap_chan *l2cap_global_chan_by_scid(int state, u16 cid,
1338 1342
1339static void l2cap_le_conn_ready(struct l2cap_conn *conn) 1343static void l2cap_le_conn_ready(struct l2cap_conn *conn)
1340{ 1344{
1341 struct sock *parent, *sk; 1345 struct sock *parent;
1342 struct l2cap_chan *chan, *pchan; 1346 struct l2cap_chan *chan, *pchan;
1343 1347
1344 BT_DBG(""); 1348 BT_DBG("");
1345 1349
1346 /* Check if we have socket listening on cid */ 1350 /* Check if we have socket listening on cid */
1347 pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_LE_DATA, 1351 pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_ATT,
1348 conn->src, conn->dst); 1352 conn->src, conn->dst);
1349 if (!pchan) 1353 if (!pchan)
1350 return; 1354 return;
1351 1355
1356 /* Client ATT sockets should override the server one */
1357 if (__l2cap_get_chan_by_dcid(conn, L2CAP_CID_ATT))
1358 return;
1359
1352 parent = pchan->sk; 1360 parent = pchan->sk;
1353 1361
1354 lock_sock(parent); 1362 lock_sock(parent);
@@ -1357,17 +1365,12 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn)
1357 if (!chan) 1365 if (!chan)
1358 goto clean; 1366 goto clean;
1359 1367
1360 sk = chan->sk; 1368 chan->dcid = L2CAP_CID_ATT;
1361
1362 hci_conn_hold(conn->hcon);
1363 conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
1364 1369
1365 bacpy(&bt_sk(sk)->src, conn->src); 1370 bacpy(&bt_sk(chan->sk)->src, conn->src);
1366 bacpy(&bt_sk(sk)->dst, conn->dst); 1371 bacpy(&bt_sk(chan->sk)->dst, conn->dst);
1367 1372
1368 l2cap_chan_add(conn, chan); 1373 __l2cap_chan_add(conn, chan);
1369
1370 l2cap_chan_ready(chan);
1371 1374
1372clean: 1375clean:
1373 release_sock(parent); 1376 release_sock(parent);
@@ -1380,14 +1383,17 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
1380 1383
1381 BT_DBG("conn %p", conn); 1384 BT_DBG("conn %p", conn);
1382 1385
1383 if (!hcon->out && hcon->type == LE_LINK) 1386 /* For outgoing pairing which doesn't necessarily have an
1384 l2cap_le_conn_ready(conn); 1387 * associated socket (e.g. mgmt_pair_device).
1385 1388 */
1386 if (hcon->out && hcon->type == LE_LINK) 1389 if (hcon->out && hcon->type == LE_LINK)
1387 smp_conn_security(hcon, hcon->pending_sec_level); 1390 smp_conn_security(hcon, hcon->pending_sec_level);
1388 1391
1389 mutex_lock(&conn->chan_lock); 1392 mutex_lock(&conn->chan_lock);
1390 1393
1394 if (hcon->type == LE_LINK)
1395 l2cap_le_conn_ready(conn);
1396
1391 list_for_each_entry(chan, &conn->chan_l, list) { 1397 list_for_each_entry(chan, &conn->chan_l, list) {
1392 1398
1393 l2cap_chan_lock(chan); 1399 l2cap_chan_lock(chan);
@@ -1409,8 +1415,9 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
1409 sk->sk_state_change(sk); 1415 sk->sk_state_change(sk);
1410 release_sock(sk); 1416 release_sock(sk);
1411 1417
1412 } else if (chan->state == BT_CONNECT) 1418 } else if (chan->state == BT_CONNECT) {
1413 l2cap_do_start(chan); 1419 l2cap_do_start(chan);
1420 }
1414 1421
1415 l2cap_chan_unlock(chan); 1422 l2cap_chan_unlock(chan);
1416 } 1423 }
@@ -1792,7 +1799,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
1792 1799
1793 auth_type = l2cap_get_auth_type(chan); 1800 auth_type = l2cap_get_auth_type(chan);
1794 1801
1795 if (chan->dcid == L2CAP_CID_LE_DATA) 1802 if (bdaddr_type_is_le(dst_type))
1796 hcon = hci_connect(hdev, LE_LINK, dst, dst_type, 1803 hcon = hci_connect(hdev, LE_LINK, dst, dst_type,
1797 chan->sec_level, auth_type); 1804 chan->sec_level, auth_type);
1798 else 1805 else
@@ -1811,16 +1818,10 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
1811 goto done; 1818 goto done;
1812 } 1819 }
1813 1820
1814 if (hcon->type == LE_LINK) { 1821 if (cid && __l2cap_get_chan_by_dcid(conn, cid)) {
1815 err = 0; 1822 hci_conn_drop(hcon);
1816 1823 err = -EBUSY;
1817 if (!list_empty(&conn->chan_l)) { 1824 goto done;
1818 err = -EBUSY;
1819 hci_conn_drop(hcon);
1820 }
1821
1822 if (err)
1823 goto done;
1824 } 1825 }
1825 1826
1826 /* Update source addr of the socket */ 1827 /* Update source addr of the socket */
@@ -1830,6 +1831,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
1830 l2cap_chan_add(conn, chan); 1831 l2cap_chan_add(conn, chan);
1831 l2cap_chan_lock(chan); 1832 l2cap_chan_lock(chan);
1832 1833
1834 /* l2cap_chan_add takes its own ref so we can drop this one */
1835 hci_conn_drop(hcon);
1836
1833 l2cap_state_change(chan, BT_CONNECT); 1837 l2cap_state_change(chan, BT_CONNECT);
1834 __set_chan_timer(chan, sk->sk_sndtimeo); 1838 __set_chan_timer(chan, sk->sk_sndtimeo);
1835 1839
@@ -2852,6 +2856,9 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code,
2852 BT_DBG("conn %p, code 0x%2.2x, ident 0x%2.2x, len %u", 2856 BT_DBG("conn %p, code 0x%2.2x, ident 0x%2.2x, len %u",
2853 conn, code, ident, dlen); 2857 conn, code, ident, dlen);
2854 2858
2859 if (conn->mtu < L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE)
2860 return NULL;
2861
2855 len = L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE + dlen; 2862 len = L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE + dlen;
2856 count = min_t(unsigned int, conn->mtu, len); 2863 count = min_t(unsigned int, conn->mtu, len);
2857 2864
@@ -3677,10 +3684,14 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len)
3677} 3684}
3678 3685
3679static inline int l2cap_command_rej(struct l2cap_conn *conn, 3686static inline int l2cap_command_rej(struct l2cap_conn *conn,
3680 struct l2cap_cmd_hdr *cmd, u8 *data) 3687 struct l2cap_cmd_hdr *cmd, u16 cmd_len,
3688 u8 *data)
3681{ 3689{
3682 struct l2cap_cmd_rej_unk *rej = (struct l2cap_cmd_rej_unk *) data; 3690 struct l2cap_cmd_rej_unk *rej = (struct l2cap_cmd_rej_unk *) data;
3683 3691
3692 if (cmd_len < sizeof(*rej))
3693 return -EPROTO;
3694
3684 if (rej->reason != L2CAP_REJ_NOT_UNDERSTOOD) 3695 if (rej->reason != L2CAP_REJ_NOT_UNDERSTOOD)
3685 return 0; 3696 return 0;
3686 3697
@@ -3744,7 +3755,12 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
3744 3755
3745 sk = chan->sk; 3756 sk = chan->sk;
3746 3757
3747 hci_conn_hold(conn->hcon); 3758 /* For certain devices (ex: HID mouse), support for authentication,
3759 * pairing and bonding is optional. For such devices, inorder to avoid
3760 * the ACL alive for too long after L2CAP disconnection, reset the ACL
3761 * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect.
3762 */
3763 conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
3748 3764
3749 bacpy(&bt_sk(sk)->src, conn->src); 3765 bacpy(&bt_sk(sk)->src, conn->src);
3750 bacpy(&bt_sk(sk)->dst, conn->dst); 3766 bacpy(&bt_sk(sk)->dst, conn->dst);
@@ -3829,11 +3845,14 @@ sendresp:
3829} 3845}
3830 3846
3831static int l2cap_connect_req(struct l2cap_conn *conn, 3847static int l2cap_connect_req(struct l2cap_conn *conn,
3832 struct l2cap_cmd_hdr *cmd, u8 *data) 3848 struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data)
3833{ 3849{
3834 struct hci_dev *hdev = conn->hcon->hdev; 3850 struct hci_dev *hdev = conn->hcon->hdev;
3835 struct hci_conn *hcon = conn->hcon; 3851 struct hci_conn *hcon = conn->hcon;
3836 3852
3853 if (cmd_len < sizeof(struct l2cap_conn_req))
3854 return -EPROTO;
3855
3837 hci_dev_lock(hdev); 3856 hci_dev_lock(hdev);
3838 if (test_bit(HCI_MGMT, &hdev->dev_flags) && 3857 if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
3839 !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags)) 3858 !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
@@ -3847,7 +3866,8 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
3847} 3866}
3848 3867
3849static int l2cap_connect_create_rsp(struct l2cap_conn *conn, 3868static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
3850 struct l2cap_cmd_hdr *cmd, u8 *data) 3869 struct l2cap_cmd_hdr *cmd, u16 cmd_len,
3870 u8 *data)
3851{ 3871{
3852 struct l2cap_conn_rsp *rsp = (struct l2cap_conn_rsp *) data; 3872 struct l2cap_conn_rsp *rsp = (struct l2cap_conn_rsp *) data;
3853 u16 scid, dcid, result, status; 3873 u16 scid, dcid, result, status;
@@ -3855,6 +3875,9 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
3855 u8 req[128]; 3875 u8 req[128];
3856 int err; 3876 int err;
3857 3877
3878 if (cmd_len < sizeof(*rsp))
3879 return -EPROTO;
3880
3858 scid = __le16_to_cpu(rsp->scid); 3881 scid = __le16_to_cpu(rsp->scid);
3859 dcid = __le16_to_cpu(rsp->dcid); 3882 dcid = __le16_to_cpu(rsp->dcid);
3860 result = __le16_to_cpu(rsp->result); 3883 result = __le16_to_cpu(rsp->result);
@@ -3952,6 +3975,9 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
3952 struct l2cap_chan *chan; 3975 struct l2cap_chan *chan;
3953 int len, err = 0; 3976 int len, err = 0;
3954 3977
3978 if (cmd_len < sizeof(*req))
3979 return -EPROTO;
3980
3955 dcid = __le16_to_cpu(req->dcid); 3981 dcid = __le16_to_cpu(req->dcid);
3956 flags = __le16_to_cpu(req->flags); 3982 flags = __le16_to_cpu(req->flags);
3957 3983
@@ -3975,7 +4001,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
3975 4001
3976 /* Reject if config buffer is too small. */ 4002 /* Reject if config buffer is too small. */
3977 len = cmd_len - sizeof(*req); 4003 len = cmd_len - sizeof(*req);
3978 if (len < 0 || chan->conf_len + len > sizeof(chan->conf_req)) { 4004 if (chan->conf_len + len > sizeof(chan->conf_req)) {
3979 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, 4005 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP,
3980 l2cap_build_conf_rsp(chan, rsp, 4006 l2cap_build_conf_rsp(chan, rsp,
3981 L2CAP_CONF_REJECT, flags), rsp); 4007 L2CAP_CONF_REJECT, flags), rsp);
@@ -4053,14 +4079,18 @@ unlock:
4053} 4079}
4054 4080
4055static inline int l2cap_config_rsp(struct l2cap_conn *conn, 4081static inline int l2cap_config_rsp(struct l2cap_conn *conn,
4056 struct l2cap_cmd_hdr *cmd, u8 *data) 4082 struct l2cap_cmd_hdr *cmd, u16 cmd_len,
4083 u8 *data)
4057{ 4084{
4058 struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data; 4085 struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data;
4059 u16 scid, flags, result; 4086 u16 scid, flags, result;
4060 struct l2cap_chan *chan; 4087 struct l2cap_chan *chan;
4061 int len = le16_to_cpu(cmd->len) - sizeof(*rsp); 4088 int len = cmd_len - sizeof(*rsp);
4062 int err = 0; 4089 int err = 0;
4063 4090
4091 if (cmd_len < sizeof(*rsp))
4092 return -EPROTO;
4093
4064 scid = __le16_to_cpu(rsp->scid); 4094 scid = __le16_to_cpu(rsp->scid);
4065 flags = __le16_to_cpu(rsp->flags); 4095 flags = __le16_to_cpu(rsp->flags);
4066 result = __le16_to_cpu(rsp->result); 4096 result = __le16_to_cpu(rsp->result);
@@ -4161,7 +4191,8 @@ done:
4161} 4191}
4162 4192
4163static inline int l2cap_disconnect_req(struct l2cap_conn *conn, 4193static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
4164 struct l2cap_cmd_hdr *cmd, u8 *data) 4194 struct l2cap_cmd_hdr *cmd, u16 cmd_len,
4195 u8 *data)
4165{ 4196{
4166 struct l2cap_disconn_req *req = (struct l2cap_disconn_req *) data; 4197 struct l2cap_disconn_req *req = (struct l2cap_disconn_req *) data;
4167 struct l2cap_disconn_rsp rsp; 4198 struct l2cap_disconn_rsp rsp;
@@ -4169,6 +4200,9 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
4169 struct l2cap_chan *chan; 4200 struct l2cap_chan *chan;
4170 struct sock *sk; 4201 struct sock *sk;
4171 4202
4203 if (cmd_len != sizeof(*req))
4204 return -EPROTO;
4205
4172 scid = __le16_to_cpu(req->scid); 4206 scid = __le16_to_cpu(req->scid);
4173 dcid = __le16_to_cpu(req->dcid); 4207 dcid = __le16_to_cpu(req->dcid);
4174 4208
@@ -4208,12 +4242,16 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
4208} 4242}
4209 4243
4210static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, 4244static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
4211 struct l2cap_cmd_hdr *cmd, u8 *data) 4245 struct l2cap_cmd_hdr *cmd, u16 cmd_len,
4246 u8 *data)
4212{ 4247{
4213 struct l2cap_disconn_rsp *rsp = (struct l2cap_disconn_rsp *) data; 4248 struct l2cap_disconn_rsp *rsp = (struct l2cap_disconn_rsp *) data;
4214 u16 dcid, scid; 4249 u16 dcid, scid;
4215 struct l2cap_chan *chan; 4250 struct l2cap_chan *chan;
4216 4251
4252 if (cmd_len != sizeof(*rsp))
4253 return -EPROTO;
4254
4217 scid = __le16_to_cpu(rsp->scid); 4255 scid = __le16_to_cpu(rsp->scid);
4218 dcid = __le16_to_cpu(rsp->dcid); 4256 dcid = __le16_to_cpu(rsp->dcid);
4219 4257
@@ -4243,11 +4281,15 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
4243} 4281}
4244 4282
4245static inline int l2cap_information_req(struct l2cap_conn *conn, 4283static inline int l2cap_information_req(struct l2cap_conn *conn,
4246 struct l2cap_cmd_hdr *cmd, u8 *data) 4284 struct l2cap_cmd_hdr *cmd, u16 cmd_len,
4285 u8 *data)
4247{ 4286{
4248 struct l2cap_info_req *req = (struct l2cap_info_req *) data; 4287 struct l2cap_info_req *req = (struct l2cap_info_req *) data;
4249 u16 type; 4288 u16 type;
4250 4289
4290 if (cmd_len != sizeof(*req))
4291 return -EPROTO;
4292
4251 type = __le16_to_cpu(req->type); 4293 type = __le16_to_cpu(req->type);
4252 4294
4253 BT_DBG("type 0x%4.4x", type); 4295 BT_DBG("type 0x%4.4x", type);
@@ -4294,11 +4336,15 @@ static inline int l2cap_information_req(struct l2cap_conn *conn,
4294} 4336}
4295 4337
4296static inline int l2cap_information_rsp(struct l2cap_conn *conn, 4338static inline int l2cap_information_rsp(struct l2cap_conn *conn,
4297 struct l2cap_cmd_hdr *cmd, u8 *data) 4339 struct l2cap_cmd_hdr *cmd, u16 cmd_len,
4340 u8 *data)
4298{ 4341{
4299 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data; 4342 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data;
4300 u16 type, result; 4343 u16 type, result;
4301 4344
4345 if (cmd_len < sizeof(*rsp))
4346 return -EPROTO;
4347
4302 type = __le16_to_cpu(rsp->type); 4348 type = __le16_to_cpu(rsp->type);
4303 result = __le16_to_cpu(rsp->result); 4349 result = __le16_to_cpu(rsp->result);
4304 4350
@@ -5164,16 +5210,16 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
5164 5210
5165 switch (cmd->code) { 5211 switch (cmd->code) {
5166 case L2CAP_COMMAND_REJ: 5212 case L2CAP_COMMAND_REJ:
5167 l2cap_command_rej(conn, cmd, data); 5213 l2cap_command_rej(conn, cmd, cmd_len, data);
5168 break; 5214 break;
5169 5215
5170 case L2CAP_CONN_REQ: 5216 case L2CAP_CONN_REQ:
5171 err = l2cap_connect_req(conn, cmd, data); 5217 err = l2cap_connect_req(conn, cmd, cmd_len, data);
5172 break; 5218 break;
5173 5219
5174 case L2CAP_CONN_RSP: 5220 case L2CAP_CONN_RSP:
5175 case L2CAP_CREATE_CHAN_RSP: 5221 case L2CAP_CREATE_CHAN_RSP:
5176 err = l2cap_connect_create_rsp(conn, cmd, data); 5222 err = l2cap_connect_create_rsp(conn, cmd, cmd_len, data);
5177 break; 5223 break;
5178 5224
5179 case L2CAP_CONF_REQ: 5225 case L2CAP_CONF_REQ:
@@ -5181,15 +5227,15 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
5181 break; 5227 break;
5182 5228
5183 case L2CAP_CONF_RSP: 5229 case L2CAP_CONF_RSP:
5184 err = l2cap_config_rsp(conn, cmd, data); 5230 err = l2cap_config_rsp(conn, cmd, cmd_len, data);
5185 break; 5231 break;
5186 5232
5187 case L2CAP_DISCONN_REQ: 5233 case L2CAP_DISCONN_REQ:
5188 err = l2cap_disconnect_req(conn, cmd, data); 5234 err = l2cap_disconnect_req(conn, cmd, cmd_len, data);
5189 break; 5235 break;
5190 5236
5191 case L2CAP_DISCONN_RSP: 5237 case L2CAP_DISCONN_RSP:
5192 err = l2cap_disconnect_rsp(conn, cmd, data); 5238 err = l2cap_disconnect_rsp(conn, cmd, cmd_len, data);
5193 break; 5239 break;
5194 5240
5195 case L2CAP_ECHO_REQ: 5241 case L2CAP_ECHO_REQ:
@@ -5200,11 +5246,11 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
5200 break; 5246 break;
5201 5247
5202 case L2CAP_INFO_REQ: 5248 case L2CAP_INFO_REQ:
5203 err = l2cap_information_req(conn, cmd, data); 5249 err = l2cap_information_req(conn, cmd, cmd_len, data);
5204 break; 5250 break;
5205 5251
5206 case L2CAP_INFO_RSP: 5252 case L2CAP_INFO_RSP:
5207 err = l2cap_information_rsp(conn, cmd, data); 5253 err = l2cap_information_rsp(conn, cmd, cmd_len, data);
5208 break; 5254 break;
5209 5255
5210 case L2CAP_CREATE_CHAN_REQ: 5256 case L2CAP_CREATE_CHAN_REQ:
@@ -5255,6 +5301,51 @@ static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn,
5255 } 5301 }
5256} 5302}
5257 5303
5304static inline void l2cap_le_sig_channel(struct l2cap_conn *conn,
5305 struct sk_buff *skb)
5306{
5307 u8 *data = skb->data;
5308 int len = skb->len;
5309 struct l2cap_cmd_hdr cmd;
5310 int err;
5311
5312 l2cap_raw_recv(conn, skb);
5313
5314 while (len >= L2CAP_CMD_HDR_SIZE) {
5315 u16 cmd_len;
5316 memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE);
5317 data += L2CAP_CMD_HDR_SIZE;
5318 len -= L2CAP_CMD_HDR_SIZE;
5319
5320 cmd_len = le16_to_cpu(cmd.len);
5321
5322 BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len,
5323 cmd.ident);
5324
5325 if (cmd_len > len || !cmd.ident) {
5326 BT_DBG("corrupted command");
5327 break;
5328 }
5329
5330 err = l2cap_le_sig_cmd(conn, &cmd, data);
5331 if (err) {
5332 struct l2cap_cmd_rej_unk rej;
5333
5334 BT_ERR("Wrong link type (%d)", err);
5335
5336 /* FIXME: Map err to a valid reason */
5337 rej.reason = __constant_cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD);
5338 l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ,
5339 sizeof(rej), &rej);
5340 }
5341
5342 data += cmd_len;
5343 len -= cmd_len;
5344 }
5345
5346 kfree_skb(skb);
5347}
5348
5258static inline void l2cap_sig_channel(struct l2cap_conn *conn, 5349static inline void l2cap_sig_channel(struct l2cap_conn *conn,
5259 struct sk_buff *skb) 5350 struct sk_buff *skb)
5260{ 5351{
@@ -5281,11 +5372,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn,
5281 break; 5372 break;
5282 } 5373 }
5283 5374
5284 if (conn->hcon->type == LE_LINK) 5375 err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data);
5285 err = l2cap_le_sig_cmd(conn, &cmd, data);
5286 else
5287 err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data);
5288
5289 if (err) { 5376 if (err) {
5290 struct l2cap_cmd_rej_unk rej; 5377 struct l2cap_cmd_rej_unk rej;
5291 5378
@@ -6319,16 +6406,13 @@ static void l2cap_att_channel(struct l2cap_conn *conn,
6319{ 6406{
6320 struct l2cap_chan *chan; 6407 struct l2cap_chan *chan;
6321 6408
6322 chan = l2cap_global_chan_by_scid(0, L2CAP_CID_LE_DATA, 6409 chan = l2cap_global_chan_by_scid(BT_CONNECTED, L2CAP_CID_ATT,
6323 conn->src, conn->dst); 6410 conn->src, conn->dst);
6324 if (!chan) 6411 if (!chan)
6325 goto drop; 6412 goto drop;
6326 6413
6327 BT_DBG("chan %p, len %d", chan, skb->len); 6414 BT_DBG("chan %p, len %d", chan, skb->len);
6328 6415
6329 if (chan->state != BT_BOUND && chan->state != BT_CONNECTED)
6330 goto drop;
6331
6332 if (chan->imtu < skb->len) 6416 if (chan->imtu < skb->len)
6333 goto drop; 6417 goto drop;
6334 6418
@@ -6358,6 +6442,8 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
6358 6442
6359 switch (cid) { 6443 switch (cid) {
6360 case L2CAP_CID_LE_SIGNALING: 6444 case L2CAP_CID_LE_SIGNALING:
6445 l2cap_le_sig_channel(conn, skb);
6446 break;
6361 case L2CAP_CID_SIGNALING: 6447 case L2CAP_CID_SIGNALING:
6362 l2cap_sig_channel(conn, skb); 6448 l2cap_sig_channel(conn, skb);
6363 break; 6449 break;
@@ -6368,7 +6454,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
6368 l2cap_conless_channel(conn, psm, skb); 6454 l2cap_conless_channel(conn, psm, skb);
6369 break; 6455 break;
6370 6456
6371 case L2CAP_CID_LE_DATA: 6457 case L2CAP_CID_ATT:
6372 l2cap_att_channel(conn, skb); 6458 l2cap_att_channel(conn, skb);
6373 break; 6459 break;
6374 6460
@@ -6494,7 +6580,7 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
6494 continue; 6580 continue;
6495 } 6581 }
6496 6582
6497 if (chan->scid == L2CAP_CID_LE_DATA) { 6583 if (chan->scid == L2CAP_CID_ATT) {
6498 if (!status && encrypt) { 6584 if (!status && encrypt) {
6499 chan->sec_level = hcon->sec_level; 6585 chan->sec_level = hcon->sec_level;
6500 l2cap_chan_ready(chan); 6586 l2cap_chan_ready(chan);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 36fed40c162c..0098af80b213 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -466,7 +466,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname,
466static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu) 466static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu)
467{ 467{
468 switch (chan->scid) { 468 switch (chan->scid) {
469 case L2CAP_CID_LE_DATA: 469 case L2CAP_CID_ATT:
470 if (mtu < L2CAP_LE_MIN_MTU) 470 if (mtu < L2CAP_LE_MIN_MTU)
471 return false; 471 return false;
472 break; 472 break;
@@ -630,7 +630,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
630 conn = chan->conn; 630 conn = chan->conn;
631 631
632 /*change security for LE channels */ 632 /*change security for LE channels */
633 if (chan->scid == L2CAP_CID_LE_DATA) { 633 if (chan->scid == L2CAP_CID_ATT) {
634 if (!conn->hcon->out) { 634 if (!conn->hcon->out) {
635 err = -EINVAL; 635 err = -EINVAL;
636 break; 636 break;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 35fef22703e9..fedc5399d465 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -102,18 +102,6 @@ static const u16 mgmt_events[] = {
102 MGMT_EV_PASSKEY_NOTIFY, 102 MGMT_EV_PASSKEY_NOTIFY,
103}; 103};
104 104
105/*
106 * These LE scan and inquiry parameters were chosen according to LE General
107 * Discovery Procedure specification.
108 */
109#define LE_SCAN_WIN 0x12
110#define LE_SCAN_INT 0x12
111#define LE_SCAN_TIMEOUT_LE_ONLY msecs_to_jiffies(10240)
112#define LE_SCAN_TIMEOUT_BREDR_LE msecs_to_jiffies(5120)
113
114#define INQUIRY_LEN_BREDR 0x08 /* TGAP(100) */
115#define INQUIRY_LEN_BREDR_LE 0x04 /* TGAP(100)/2 */
116
117#define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) 105#define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000)
118 106
119#define hdev_is_powered(hdev) (test_bit(HCI_UP, &hdev->flags) && \ 107#define hdev_is_powered(hdev) (test_bit(HCI_UP, &hdev->flags) && \
@@ -1748,8 +1736,6 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
1748 1736
1749 hci_link_keys_clear(hdev); 1737 hci_link_keys_clear(hdev);
1750 1738
1751 set_bit(HCI_LINK_KEYS, &hdev->dev_flags);
1752
1753 if (cp->debug_keys) 1739 if (cp->debug_keys)
1754 set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags); 1740 set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
1755 else 1741 else
@@ -2633,28 +2619,72 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
2633 return err; 2619 return err;
2634} 2620}
2635 2621
2636int mgmt_interleaved_discovery(struct hci_dev *hdev) 2622static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status)
2637{ 2623{
2624 struct pending_cmd *cmd;
2625 u8 type;
2638 int err; 2626 int err;
2639 2627
2640 BT_DBG("%s", hdev->name); 2628 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
2641 2629
2642 hci_dev_lock(hdev); 2630 cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev);
2631 if (!cmd)
2632 return -ENOENT;
2643 2633
2644 err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR_LE); 2634 type = hdev->discovery.type;
2645 if (err < 0)
2646 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
2647 2635
2648 hci_dev_unlock(hdev); 2636 err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status),
2637 &type, sizeof(type));
2638 mgmt_pending_remove(cmd);
2649 2639
2650 return err; 2640 return err;
2651} 2641}
2652 2642
2643static void start_discovery_complete(struct hci_dev *hdev, u8 status)
2644{
2645 BT_DBG("status %d", status);
2646
2647 if (status) {
2648 hci_dev_lock(hdev);
2649 mgmt_start_discovery_failed(hdev, status);
2650 hci_dev_unlock(hdev);
2651 return;
2652 }
2653
2654 hci_dev_lock(hdev);
2655 hci_discovery_set_state(hdev, DISCOVERY_FINDING);
2656 hci_dev_unlock(hdev);
2657
2658 switch (hdev->discovery.type) {
2659 case DISCOV_TYPE_LE:
2660 queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
2661 DISCOV_LE_TIMEOUT);
2662 break;
2663
2664 case DISCOV_TYPE_INTERLEAVED:
2665 queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
2666 DISCOV_INTERLEAVED_TIMEOUT);
2667 break;
2668
2669 case DISCOV_TYPE_BREDR:
2670 break;
2671
2672 default:
2673 BT_ERR("Invalid discovery type %d", hdev->discovery.type);
2674 }
2675}
2676
2653static int start_discovery(struct sock *sk, struct hci_dev *hdev, 2677static int start_discovery(struct sock *sk, struct hci_dev *hdev,
2654 void *data, u16 len) 2678 void *data, u16 len)
2655{ 2679{
2656 struct mgmt_cp_start_discovery *cp = data; 2680 struct mgmt_cp_start_discovery *cp = data;
2657 struct pending_cmd *cmd; 2681 struct pending_cmd *cmd;
2682 struct hci_cp_le_set_scan_param param_cp;
2683 struct hci_cp_le_set_scan_enable enable_cp;
2684 struct hci_cp_inquiry inq_cp;
2685 struct hci_request req;
2686 /* General inquiry access code (GIAC) */
2687 u8 lap[3] = { 0x33, 0x8b, 0x9e };
2658 int err; 2688 int err;
2659 2689
2660 BT_DBG("%s", hdev->name); 2690 BT_DBG("%s", hdev->name);
@@ -2687,6 +2717,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
2687 2717
2688 hdev->discovery.type = cp->type; 2718 hdev->discovery.type = cp->type;
2689 2719
2720 hci_req_init(&req, hdev);
2721
2690 switch (hdev->discovery.type) { 2722 switch (hdev->discovery.type) {
2691 case DISCOV_TYPE_BREDR: 2723 case DISCOV_TYPE_BREDR:
2692 if (!lmp_bredr_capable(hdev)) { 2724 if (!lmp_bredr_capable(hdev)) {
@@ -2696,31 +2728,64 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
2696 goto failed; 2728 goto failed;
2697 } 2729 }
2698 2730
2699 err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); 2731 if (test_bit(HCI_INQUIRY, &hdev->flags)) {
2732 err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
2733 MGMT_STATUS_BUSY);
2734 mgmt_pending_remove(cmd);
2735 goto failed;
2736 }
2737
2738 hci_inquiry_cache_flush(hdev);
2739
2740 memset(&inq_cp, 0, sizeof(inq_cp));
2741 memcpy(&inq_cp.lap, lap, sizeof(inq_cp.lap));
2742 inq_cp.length = DISCOV_BREDR_INQUIRY_LEN;
2743 hci_req_add(&req, HCI_OP_INQUIRY, sizeof(inq_cp), &inq_cp);
2700 break; 2744 break;
2701 2745
2702 case DISCOV_TYPE_LE: 2746 case DISCOV_TYPE_LE:
2703 if (!lmp_host_le_capable(hdev)) { 2747 case DISCOV_TYPE_INTERLEAVED:
2748 if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
2704 err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, 2749 err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
2705 MGMT_STATUS_NOT_SUPPORTED); 2750 MGMT_STATUS_NOT_SUPPORTED);
2706 mgmt_pending_remove(cmd); 2751 mgmt_pending_remove(cmd);
2707 goto failed; 2752 goto failed;
2708 } 2753 }
2709 2754
2710 err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, 2755 if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED &&
2711 LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); 2756 !lmp_bredr_capable(hdev)) {
2712 break;
2713
2714 case DISCOV_TYPE_INTERLEAVED:
2715 if (!lmp_host_le_capable(hdev) || !lmp_bredr_capable(hdev)) {
2716 err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, 2757 err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
2717 MGMT_STATUS_NOT_SUPPORTED); 2758 MGMT_STATUS_NOT_SUPPORTED);
2718 mgmt_pending_remove(cmd); 2759 mgmt_pending_remove(cmd);
2719 goto failed; 2760 goto failed;
2720 } 2761 }
2721 2762
2722 err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, 2763 if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) {
2723 LE_SCAN_WIN, LE_SCAN_TIMEOUT_BREDR_LE); 2764 err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
2765 MGMT_STATUS_REJECTED);
2766 mgmt_pending_remove(cmd);
2767 goto failed;
2768 }
2769
2770 if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) {
2771 err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
2772 MGMT_STATUS_BUSY);
2773 mgmt_pending_remove(cmd);
2774 goto failed;
2775 }
2776
2777 memset(&param_cp, 0, sizeof(param_cp));
2778 param_cp.type = LE_SCAN_ACTIVE;
2779 param_cp.interval = cpu_to_le16(DISCOV_LE_SCAN_INT);
2780 param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN);
2781 hci_req_add(&req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
2782 &param_cp);
2783
2784 memset(&enable_cp, 0, sizeof(enable_cp));
2785 enable_cp.enable = LE_SCAN_ENABLE;
2786 enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
2787 hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
2788 &enable_cp);
2724 break; 2789 break;
2725 2790
2726 default: 2791 default:
@@ -2730,6 +2795,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
2730 goto failed; 2795 goto failed;
2731 } 2796 }
2732 2797
2798 err = hci_req_run(&req, start_discovery_complete);
2733 if (err < 0) 2799 if (err < 0)
2734 mgmt_pending_remove(cmd); 2800 mgmt_pending_remove(cmd);
2735 else 2801 else
@@ -2740,6 +2806,39 @@ failed:
2740 return err; 2806 return err;
2741} 2807}
2742 2808
2809static int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status)
2810{
2811 struct pending_cmd *cmd;
2812 int err;
2813
2814 cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev);
2815 if (!cmd)
2816 return -ENOENT;
2817
2818 err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status),
2819 &hdev->discovery.type, sizeof(hdev->discovery.type));
2820 mgmt_pending_remove(cmd);
2821
2822 return err;
2823}
2824
2825static void stop_discovery_complete(struct hci_dev *hdev, u8 status)
2826{
2827 BT_DBG("status %d", status);
2828
2829 hci_dev_lock(hdev);
2830
2831 if (status) {
2832 mgmt_stop_discovery_failed(hdev, status);
2833 goto unlock;
2834 }
2835
2836 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
2837
2838unlock:
2839 hci_dev_unlock(hdev);
2840}
2841
2743static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, 2842static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
2744 u16 len) 2843 u16 len)
2745{ 2844{
@@ -2747,6 +2846,8 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
2747 struct pending_cmd *cmd; 2846 struct pending_cmd *cmd;
2748 struct hci_cp_remote_name_req_cancel cp; 2847 struct hci_cp_remote_name_req_cancel cp;
2749 struct inquiry_entry *e; 2848 struct inquiry_entry *e;
2849 struct hci_request req;
2850 struct hci_cp_le_set_scan_enable enable_cp;
2750 int err; 2851 int err;
2751 2852
2752 BT_DBG("%s", hdev->name); 2853 BT_DBG("%s", hdev->name);
@@ -2773,12 +2874,20 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
2773 goto unlock; 2874 goto unlock;
2774 } 2875 }
2775 2876
2877 hci_req_init(&req, hdev);
2878
2776 switch (hdev->discovery.state) { 2879 switch (hdev->discovery.state) {
2777 case DISCOVERY_FINDING: 2880 case DISCOVERY_FINDING:
2778 if (test_bit(HCI_INQUIRY, &hdev->flags)) 2881 if (test_bit(HCI_INQUIRY, &hdev->flags)) {
2779 err = hci_cancel_inquiry(hdev); 2882 hci_req_add(&req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
2780 else 2883 } else {
2781 err = hci_cancel_le_scan(hdev); 2884 cancel_delayed_work(&hdev->le_scan_disable);
2885
2886 memset(&enable_cp, 0, sizeof(enable_cp));
2887 enable_cp.enable = LE_SCAN_DISABLE;
2888 hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE,
2889 sizeof(enable_cp), &enable_cp);
2890 }
2782 2891
2783 break; 2892 break;
2784 2893
@@ -2796,16 +2905,22 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
2796 } 2905 }
2797 2906
2798 bacpy(&cp.bdaddr, &e->data.bdaddr); 2907 bacpy(&cp.bdaddr, &e->data.bdaddr);
2799 err = hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ_CANCEL, 2908 hci_req_add(&req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
2800 sizeof(cp), &cp); 2909 &cp);
2801 2910
2802 break; 2911 break;
2803 2912
2804 default: 2913 default:
2805 BT_DBG("unknown discovery state %u", hdev->discovery.state); 2914 BT_DBG("unknown discovery state %u", hdev->discovery.state);
2806 err = -EFAULT; 2915
2916 mgmt_pending_remove(cmd);
2917 err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY,
2918 MGMT_STATUS_FAILED, &mgmt_cp->type,
2919 sizeof(mgmt_cp->type));
2920 goto unlock;
2807 } 2921 }
2808 2922
2923 err = hci_req_run(&req, stop_discovery_complete);
2809 if (err < 0) 2924 if (err < 0)
2810 mgmt_pending_remove(cmd); 2925 mgmt_pending_remove(cmd);
2811 else 2926 else
@@ -3418,6 +3533,27 @@ new_settings:
3418 return err; 3533 return err;
3419} 3534}
3420 3535
3536int mgmt_set_powered_failed(struct hci_dev *hdev, int err)
3537{
3538 struct pending_cmd *cmd;
3539 u8 status;
3540
3541 cmd = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev);
3542 if (!cmd)
3543 return -ENOENT;
3544
3545 if (err == -ERFKILL)
3546 status = MGMT_STATUS_RFKILLED;
3547 else
3548 status = MGMT_STATUS_FAILED;
3549
3550 err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_POWERED, status);
3551
3552 mgmt_pending_remove(cmd);
3553
3554 return err;
3555}
3556
3421int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable) 3557int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable)
3422{ 3558{
3423 struct cmd_lookup match = { NULL, hdev }; 3559 struct cmd_lookup match = { NULL, hdev };
@@ -4042,6 +4178,9 @@ int mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
4042 struct mgmt_ev_device_found *ev = (void *) buf; 4178 struct mgmt_ev_device_found *ev = (void *) buf;
4043 size_t ev_size; 4179 size_t ev_size;
4044 4180
4181 if (!hci_discovery_active(hdev))
4182 return -EPERM;
4183
4045 /* Leave 5 bytes for a potential CoD field */ 4184 /* Leave 5 bytes for a potential CoD field */
4046 if (sizeof(*ev) + eir_len + 5 > sizeof(buf)) 4185 if (sizeof(*ev) + eir_len + 5 > sizeof(buf))
4047 return -EINVAL; 4186 return -EINVAL;
@@ -4093,43 +4232,6 @@ int mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
4093 sizeof(*ev) + eir_len, NULL); 4232 sizeof(*ev) + eir_len, NULL);
4094} 4233}
4095 4234
4096int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status)
4097{
4098 struct pending_cmd *cmd;
4099 u8 type;
4100 int err;
4101
4102 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
4103
4104 cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev);
4105 if (!cmd)
4106 return -ENOENT;
4107
4108 type = hdev->discovery.type;
4109
4110 err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status),
4111 &type, sizeof(type));
4112 mgmt_pending_remove(cmd);
4113
4114 return err;
4115}
4116
4117int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status)
4118{
4119 struct pending_cmd *cmd;
4120 int err;
4121
4122 cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev);
4123 if (!cmd)
4124 return -ENOENT;
4125
4126 err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status),
4127 &hdev->discovery.type, sizeof(hdev->discovery.type));
4128 mgmt_pending_remove(cmd);
4129
4130 return err;
4131}
4132
4133int mgmt_discovering(struct hci_dev *hdev, u8 discovering) 4235int mgmt_discovering(struct hci_dev *hdev, u8 discovering)
4134{ 4236{
4135 struct mgmt_ev_discovering ev; 4237 struct mgmt_ev_discovering ev;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index b6e44ad6cca6..84fcf9fff3ea 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -58,7 +58,6 @@ struct rfcomm_dev {
58 uint modem_status; 58 uint modem_status;
59 59
60 struct rfcomm_dlc *dlc; 60 struct rfcomm_dlc *dlc;
61 wait_queue_head_t wait;
62 61
63 struct device *tty_dev; 62 struct device *tty_dev;
64 63
@@ -76,13 +75,6 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig);
76 75
77/* ---- Device functions ---- */ 76/* ---- Device functions ---- */
78 77
79/*
80 * The reason this isn't actually a race, as you no doubt have a little voice
81 * screaming at you in your head, is that the refcount should never actually
82 * reach zero unless the device has already been taken off the list, in
83 * rfcomm_dev_del(). And if that's not true, we'll hit the BUG() in
84 * rfcomm_dev_destruct() anyway.
85 */
86static void rfcomm_dev_destruct(struct tty_port *port) 78static void rfcomm_dev_destruct(struct tty_port *port)
87{ 79{
88 struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port); 80 struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
@@ -90,10 +82,9 @@ static void rfcomm_dev_destruct(struct tty_port *port)
90 82
91 BT_DBG("dev %p dlc %p", dev, dlc); 83 BT_DBG("dev %p dlc %p", dev, dlc);
92 84
93 /* Refcount should only hit zero when called from rfcomm_dev_del() 85 spin_lock(&rfcomm_dev_lock);
94 which will have taken us off the list. Everything else are 86 list_del(&dev->list);
95 refcounting bugs. */ 87 spin_unlock(&rfcomm_dev_lock);
96 BUG_ON(!list_empty(&dev->list));
97 88
98 rfcomm_dlc_lock(dlc); 89 rfcomm_dlc_lock(dlc);
99 /* Detach DLC if it's owned by this dev */ 90 /* Detach DLC if it's owned by this dev */
@@ -112,8 +103,39 @@ static void rfcomm_dev_destruct(struct tty_port *port)
112 module_put(THIS_MODULE); 103 module_put(THIS_MODULE);
113} 104}
114 105
106/* device-specific initialization: open the dlc */
107static int rfcomm_dev_activate(struct tty_port *port, struct tty_struct *tty)
108{
109 struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
110
111 return rfcomm_dlc_open(dev->dlc, &dev->src, &dev->dst, dev->channel);
112}
113
114/* we block the open until the dlc->state becomes BT_CONNECTED */
115static int rfcomm_dev_carrier_raised(struct tty_port *port)
116{
117 struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
118
119 return (dev->dlc->state == BT_CONNECTED);
120}
121
122/* device-specific cleanup: close the dlc */
123static void rfcomm_dev_shutdown(struct tty_port *port)
124{
125 struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
126
127 if (dev->tty_dev->parent)
128 device_move(dev->tty_dev, NULL, DPM_ORDER_DEV_LAST);
129
130 /* close the dlc */
131 rfcomm_dlc_close(dev->dlc, 0);
132}
133
115static const struct tty_port_operations rfcomm_port_ops = { 134static const struct tty_port_operations rfcomm_port_ops = {
116 .destruct = rfcomm_dev_destruct, 135 .destruct = rfcomm_dev_destruct,
136 .activate = rfcomm_dev_activate,
137 .shutdown = rfcomm_dev_shutdown,
138 .carrier_raised = rfcomm_dev_carrier_raised,
117}; 139};
118 140
119static struct rfcomm_dev *__rfcomm_dev_get(int id) 141static struct rfcomm_dev *__rfcomm_dev_get(int id)
@@ -236,7 +258,6 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
236 258
237 tty_port_init(&dev->port); 259 tty_port_init(&dev->port);
238 dev->port.ops = &rfcomm_port_ops; 260 dev->port.ops = &rfcomm_port_ops;
239 init_waitqueue_head(&dev->wait);
240 261
241 skb_queue_head_init(&dev->pending); 262 skb_queue_head_init(&dev->pending);
242 263
@@ -282,7 +303,9 @@ out:
282 dev->id, NULL); 303 dev->id, NULL);
283 if (IS_ERR(dev->tty_dev)) { 304 if (IS_ERR(dev->tty_dev)) {
284 err = PTR_ERR(dev->tty_dev); 305 err = PTR_ERR(dev->tty_dev);
306 spin_lock(&rfcomm_dev_lock);
285 list_del(&dev->list); 307 list_del(&dev->list);
308 spin_unlock(&rfcomm_dev_lock);
286 goto free; 309 goto free;
287 } 310 }
288 311
@@ -301,27 +324,6 @@ free:
301 return err; 324 return err;
302} 325}
303 326
304static void rfcomm_dev_del(struct rfcomm_dev *dev)
305{
306 unsigned long flags;
307 BT_DBG("dev %p", dev);
308
309 BUG_ON(test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags));
310
311 spin_lock_irqsave(&dev->port.lock, flags);
312 if (dev->port.count > 0) {
313 spin_unlock_irqrestore(&dev->port.lock, flags);
314 return;
315 }
316 spin_unlock_irqrestore(&dev->port.lock, flags);
317
318 spin_lock(&rfcomm_dev_lock);
319 list_del_init(&dev->list);
320 spin_unlock(&rfcomm_dev_lock);
321
322 tty_port_put(&dev->port);
323}
324
325/* ---- Send buffer ---- */ 327/* ---- Send buffer ---- */
326static inline unsigned int rfcomm_room(struct rfcomm_dlc *dlc) 328static inline unsigned int rfcomm_room(struct rfcomm_dlc *dlc)
327{ 329{
@@ -333,10 +335,9 @@ static inline unsigned int rfcomm_room(struct rfcomm_dlc *dlc)
333static void rfcomm_wfree(struct sk_buff *skb) 335static void rfcomm_wfree(struct sk_buff *skb)
334{ 336{
335 struct rfcomm_dev *dev = (void *) skb->sk; 337 struct rfcomm_dev *dev = (void *) skb->sk;
336 struct tty_struct *tty = dev->port.tty;
337 atomic_sub(skb->truesize, &dev->wmem_alloc); 338 atomic_sub(skb->truesize, &dev->wmem_alloc);
338 if (test_bit(RFCOMM_TTY_ATTACHED, &dev->flags) && tty) 339 if (test_bit(RFCOMM_TTY_ATTACHED, &dev->flags))
339 tty_wakeup(tty); 340 tty_port_tty_wakeup(&dev->port);
340 tty_port_put(&dev->port); 341 tty_port_put(&dev->port);
341} 342}
342 343
@@ -410,6 +411,7 @@ static int rfcomm_release_dev(void __user *arg)
410{ 411{
411 struct rfcomm_dev_req req; 412 struct rfcomm_dev_req req;
412 struct rfcomm_dev *dev; 413 struct rfcomm_dev *dev;
414 struct tty_struct *tty;
413 415
414 if (copy_from_user(&req, arg, sizeof(req))) 416 if (copy_from_user(&req, arg, sizeof(req)))
415 return -EFAULT; 417 return -EFAULT;
@@ -429,11 +431,15 @@ static int rfcomm_release_dev(void __user *arg)
429 rfcomm_dlc_close(dev->dlc, 0); 431 rfcomm_dlc_close(dev->dlc, 0);
430 432
431 /* Shut down TTY synchronously before freeing rfcomm_dev */ 433 /* Shut down TTY synchronously before freeing rfcomm_dev */
432 if (dev->port.tty) 434 tty = tty_port_tty_get(&dev->port);
433 tty_vhangup(dev->port.tty); 435 if (tty) {
436 tty_vhangup(tty);
437 tty_kref_put(tty);
438 }
439
440 if (!test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags))
441 tty_port_put(&dev->port);
434 442
435 if (!test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags))
436 rfcomm_dev_del(dev);
437 tty_port_put(&dev->port); 443 tty_port_put(&dev->port);
438 return 0; 444 return 0;
439} 445}
@@ -569,31 +575,13 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
569 BT_DBG("dlc %p dev %p err %d", dlc, dev, err); 575 BT_DBG("dlc %p dev %p err %d", dlc, dev, err);
570 576
571 dev->err = err; 577 dev->err = err;
572 wake_up_interruptible(&dev->wait); 578 if (dlc->state == BT_CONNECTED) {
573 579 device_move(dev->tty_dev, rfcomm_get_device(dev),
574 if (dlc->state == BT_CLOSED) { 580 DPM_ORDER_DEV_AFTER_PARENT);
575 if (!dev->port.tty) { 581
576 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { 582 wake_up_interruptible(&dev->port.open_wait);
577 /* Drop DLC lock here to avoid deadlock 583 } else if (dlc->state == BT_CLOSED)
578 * 1. rfcomm_dev_get will take rfcomm_dev_lock 584 tty_port_tty_hangup(&dev->port, false);
579 * but in rfcomm_dev_add there's lock order:
580 * rfcomm_dev_lock -> dlc lock
581 * 2. tty_port_put will deadlock if it's
582 * the last reference
583 */
584 rfcomm_dlc_unlock(dlc);
585 if (rfcomm_dev_get(dev->id) == NULL) {
586 rfcomm_dlc_lock(dlc);
587 return;
588 }
589
590 rfcomm_dev_del(dev);
591 tty_port_put(&dev->port);
592 rfcomm_dlc_lock(dlc);
593 }
594 } else
595 tty_hangup(dev->port.tty);
596 }
597} 585}
598 586
599static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) 587static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
@@ -604,10 +592,8 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
604 592
605 BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig); 593 BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig);
606 594
607 if ((dev->modem_status & TIOCM_CD) && !(v24_sig & RFCOMM_V24_DV)) { 595 if ((dev->modem_status & TIOCM_CD) && !(v24_sig & RFCOMM_V24_DV))
608 if (dev->port.tty && !C_CLOCAL(dev->port.tty)) 596 tty_port_tty_hangup(&dev->port, true);
609 tty_hangup(dev->port.tty);
610 }
611 597
612 dev->modem_status = 598 dev->modem_status =
613 ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) | 599 ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) |
@@ -638,124 +624,92 @@ static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev)
638 tty_flip_buffer_push(&dev->port); 624 tty_flip_buffer_push(&dev->port);
639} 625}
640 626
641static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) 627/* do the reverse of install, clearing the tty fields and releasing the
628 * reference to tty_port
629 */
630static void rfcomm_tty_cleanup(struct tty_struct *tty)
642{ 631{
643 DECLARE_WAITQUEUE(wait, current); 632 struct rfcomm_dev *dev = tty->driver_data;
644 struct rfcomm_dev *dev;
645 struct rfcomm_dlc *dlc;
646 unsigned long flags;
647 int err, id;
648 633
649 id = tty->index; 634 clear_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
650 635
651 BT_DBG("tty %p id %d", tty, id); 636 rfcomm_dlc_lock(dev->dlc);
637 tty->driver_data = NULL;
638 rfcomm_dlc_unlock(dev->dlc);
652 639
653 /* We don't leak this refcount. For reasons which are not entirely 640 /*
654 clear, the TTY layer will call our ->close() method even if the 641 * purge the dlc->tx_queue to avoid circular dependencies
655 open fails. We decrease the refcount there, and decreasing it 642 * between dev and dlc
656 here too would cause breakage. */ 643 */
657 dev = rfcomm_dev_get(id); 644 skb_queue_purge(&dev->dlc->tx_queue);
658 if (!dev)
659 return -ENODEV;
660 645
661 BT_DBG("dev %p dst %pMR channel %d opened %d", dev, &dev->dst, 646 tty_port_put(&dev->port);
662 dev->channel, dev->port.count); 647}
663 648
664 spin_lock_irqsave(&dev->port.lock, flags); 649/* we acquire the tty_port reference since it's here the tty is first used
665 if (++dev->port.count > 1) { 650 * by setting the termios. We also populate the driver_data field and install
666 spin_unlock_irqrestore(&dev->port.lock, flags); 651 * the tty port
667 return 0; 652 */
668 } 653static int rfcomm_tty_install(struct tty_driver *driver, struct tty_struct *tty)
669 spin_unlock_irqrestore(&dev->port.lock, flags); 654{
655 struct rfcomm_dev *dev;
656 struct rfcomm_dlc *dlc;
657 int err;
658
659 dev = rfcomm_dev_get(tty->index);
660 if (!dev)
661 return -ENODEV;
670 662
671 dlc = dev->dlc; 663 dlc = dev->dlc;
672 664
673 /* Attach TTY and open DLC */ 665 /* Attach TTY and open DLC */
674
675 rfcomm_dlc_lock(dlc); 666 rfcomm_dlc_lock(dlc);
676 tty->driver_data = dev; 667 tty->driver_data = dev;
677 dev->port.tty = tty;
678 rfcomm_dlc_unlock(dlc); 668 rfcomm_dlc_unlock(dlc);
679 set_bit(RFCOMM_TTY_ATTACHED, &dev->flags); 669 set_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
680 670
681 err = rfcomm_dlc_open(dlc, &dev->src, &dev->dst, dev->channel); 671 /* install the tty_port */
682 if (err < 0) 672 err = tty_port_install(&dev->port, driver, tty);
683 return err; 673 if (err)
684 674 rfcomm_tty_cleanup(tty);
685 /* Wait for DLC to connect */
686 add_wait_queue(&dev->wait, &wait);
687 while (1) {
688 set_current_state(TASK_INTERRUPTIBLE);
689 675
690 if (dlc->state == BT_CLOSED) { 676 return err;
691 err = -dev->err; 677}
692 break;
693 }
694 678
695 if (dlc->state == BT_CONNECTED) 679static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
696 break; 680{
681 struct rfcomm_dev *dev = tty->driver_data;
682 int err;
697 683
698 if (signal_pending(current)) { 684 BT_DBG("tty %p id %d", tty, tty->index);
699 err = -EINTR;
700 break;
701 }
702 685
703 tty_unlock(tty); 686 BT_DBG("dev %p dst %pMR channel %d opened %d", dev, &dev->dst,
704 schedule(); 687 dev->channel, dev->port.count);
705 tty_lock(tty);
706 }
707 set_current_state(TASK_RUNNING);
708 remove_wait_queue(&dev->wait, &wait);
709 688
710 if (err == 0) 689 err = tty_port_open(&dev->port, tty, filp);
711 device_move(dev->tty_dev, rfcomm_get_device(dev), 690 if (err)
712 DPM_ORDER_DEV_AFTER_PARENT); 691 return err;
713 692
693 /*
694 * FIXME: rfcomm should use proper flow control for
695 * received data. This hack will be unnecessary and can
696 * be removed when that's implemented
697 */
714 rfcomm_tty_copy_pending(dev); 698 rfcomm_tty_copy_pending(dev);
715 699
716 rfcomm_dlc_unthrottle(dev->dlc); 700 rfcomm_dlc_unthrottle(dev->dlc);
717 701
718 return err; 702 return 0;
719} 703}
720 704
721static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp) 705static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp)
722{ 706{
723 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; 707 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
724 unsigned long flags;
725
726 if (!dev)
727 return;
728 708
729 BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, 709 BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc,
730 dev->port.count); 710 dev->port.count);
731 711
732 spin_lock_irqsave(&dev->port.lock, flags); 712 tty_port_close(&dev->port, tty, filp);
733 if (!--dev->port.count) {
734 spin_unlock_irqrestore(&dev->port.lock, flags);
735 if (dev->tty_dev->parent)
736 device_move(dev->tty_dev, NULL, DPM_ORDER_DEV_LAST);
737
738 /* Close DLC and dettach TTY */
739 rfcomm_dlc_close(dev->dlc, 0);
740
741 clear_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
742
743 rfcomm_dlc_lock(dev->dlc);
744 tty->driver_data = NULL;
745 dev->port.tty = NULL;
746 rfcomm_dlc_unlock(dev->dlc);
747
748 if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) {
749 spin_lock(&rfcomm_dev_lock);
750 list_del_init(&dev->list);
751 spin_unlock(&rfcomm_dev_lock);
752
753 tty_port_put(&dev->port);
754 }
755 } else
756 spin_unlock_irqrestore(&dev->port.lock, flags);
757
758 tty_port_put(&dev->port);
759} 713}
760 714
761static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) 715static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
@@ -1055,17 +1009,11 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
1055 1009
1056 BT_DBG("tty %p dev %p", tty, dev); 1010 BT_DBG("tty %p dev %p", tty, dev);
1057 1011
1058 if (!dev) 1012 tty_port_hangup(&dev->port);
1059 return;
1060
1061 rfcomm_tty_flush_buffer(tty);
1062 1013
1063 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { 1014 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags) &&
1064 if (rfcomm_dev_get(dev->id) == NULL) 1015 !test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags))
1065 return;
1066 rfcomm_dev_del(dev);
1067 tty_port_put(&dev->port); 1016 tty_port_put(&dev->port);
1068 }
1069} 1017}
1070 1018
1071static int rfcomm_tty_tiocmget(struct tty_struct *tty) 1019static int rfcomm_tty_tiocmget(struct tty_struct *tty)
@@ -1128,6 +1076,8 @@ static const struct tty_operations rfcomm_ops = {
1128 .wait_until_sent = rfcomm_tty_wait_until_sent, 1076 .wait_until_sent = rfcomm_tty_wait_until_sent,
1129 .tiocmget = rfcomm_tty_tiocmget, 1077 .tiocmget = rfcomm_tty_tiocmget,
1130 .tiocmset = rfcomm_tty_tiocmset, 1078 .tiocmset = rfcomm_tty_tiocmset,
1079 .install = rfcomm_tty_install,
1080 .cleanup = rfcomm_tty_cleanup,
1131}; 1081};
1132 1082
1133int __init rfcomm_init_ttys(void) 1083int __init rfcomm_init_ttys(void)
@@ -1146,7 +1096,7 @@ int __init rfcomm_init_ttys(void)
1146 rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL; 1096 rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL;
1147 rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; 1097 rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
1148 rfcomm_tty_driver->init_termios = tty_std_termios; 1098 rfcomm_tty_driver->init_termios = tty_std_termios;
1149 rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; 1099 rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL;
1150 rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON; 1100 rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON;
1151 tty_set_operations(rfcomm_tty_driver, &rfcomm_ops); 1101 tty_set_operations(rfcomm_tty_driver, &rfcomm_ops);
1152 1102
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index e7bd4eea575c..96bd388d93a4 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -176,8 +176,13 @@ static int sco_connect(struct sock *sk)
176 else 176 else
177 type = SCO_LINK; 177 type = SCO_LINK;
178 178
179 hcon = hci_connect(hdev, type, dst, BDADDR_BREDR, BT_SECURITY_LOW, 179 if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT &&
180 HCI_AT_NO_BONDING); 180 (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) {
181 err = -EOPNOTSUPP;
182 goto done;
183 }
184
185 hcon = hci_connect_sco(hdev, type, dst, sco_pi(sk)->setting);
181 if (IS_ERR(hcon)) { 186 if (IS_ERR(hcon)) {
182 err = PTR_ERR(hcon); 187 err = PTR_ERR(hcon);
183 goto done; 188 goto done;
@@ -417,6 +422,8 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro
417 sk->sk_protocol = proto; 422 sk->sk_protocol = proto;
418 sk->sk_state = BT_OPEN; 423 sk->sk_state = BT_OPEN;
419 424
425 sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
426
420 setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk); 427 setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk);
421 428
422 bt_sock_link(&sco_sk_list, sk); 429 bt_sock_link(&sco_sk_list, sk);
@@ -652,7 +659,7 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
652 return err; 659 return err;
653} 660}
654 661
655static void sco_conn_defer_accept(struct hci_conn *conn, int mask) 662static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
656{ 663{
657 struct hci_dev *hdev = conn->hdev; 664 struct hci_dev *hdev = conn->hdev;
658 665
@@ -664,11 +671,7 @@ static void sco_conn_defer_accept(struct hci_conn *conn, int mask)
664 struct hci_cp_accept_conn_req cp; 671 struct hci_cp_accept_conn_req cp;
665 672
666 bacpy(&cp.bdaddr, &conn->dst); 673 bacpy(&cp.bdaddr, &conn->dst);
667 674 cp.role = 0x00; /* Ignored */
668 if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
669 cp.role = 0x00; /* Become master */
670 else
671 cp.role = 0x01; /* Remain slave */
672 675
673 hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp); 676 hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
674 } else { 677 } else {
@@ -679,9 +682,21 @@ static void sco_conn_defer_accept(struct hci_conn *conn, int mask)
679 682
680 cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40); 683 cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
681 cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40); 684 cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
682 cp.max_latency = __constant_cpu_to_le16(0xffff); 685 cp.content_format = cpu_to_le16(setting);
683 cp.content_format = cpu_to_le16(hdev->voice_setting); 686
684 cp.retrans_effort = 0xff; 687 switch (setting & SCO_AIRMODE_MASK) {
688 case SCO_AIRMODE_TRANSP:
689 if (conn->pkt_type & ESCO_2EV3)
690 cp.max_latency = __constant_cpu_to_le16(0x0008);
691 else
692 cp.max_latency = __constant_cpu_to_le16(0x000D);
693 cp.retrans_effort = 0x02;
694 break;
695 case SCO_AIRMODE_CVSD:
696 cp.max_latency = __constant_cpu_to_le16(0xffff);
697 cp.retrans_effort = 0xff;
698 break;
699 }
685 700
686 hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, 701 hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
687 sizeof(cp), &cp); 702 sizeof(cp), &cp);
@@ -698,7 +713,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
698 713
699 if (sk->sk_state == BT_CONNECT2 && 714 if (sk->sk_state == BT_CONNECT2 &&
700 test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { 715 test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
701 sco_conn_defer_accept(pi->conn->hcon, 0); 716 sco_conn_defer_accept(pi->conn->hcon, pi->setting);
702 sk->sk_state = BT_CONFIG; 717 sk->sk_state = BT_CONFIG;
703 msg->msg_namelen = 0; 718 msg->msg_namelen = 0;
704 719
@@ -714,7 +729,8 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
714static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) 729static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
715{ 730{
716 struct sock *sk = sock->sk; 731 struct sock *sk = sock->sk;
717 int err = 0; 732 int len, err = 0;
733 struct bt_voice voice;
718 u32 opt; 734 u32 opt;
719 735
720 BT_DBG("sk %p", sk); 736 BT_DBG("sk %p", sk);
@@ -740,6 +756,31 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
740 clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); 756 clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
741 break; 757 break;
742 758
759 case BT_VOICE:
760 if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
761 sk->sk_state != BT_CONNECT2) {
762 err = -EINVAL;
763 break;
764 }
765
766 voice.setting = sco_pi(sk)->setting;
767
768 len = min_t(unsigned int, sizeof(voice), optlen);
769 if (copy_from_user((char *) &voice, optval, len)) {
770 err = -EFAULT;
771 break;
772 }
773
774 /* Explicitly check for these values */
775 if (voice.setting != BT_VOICE_TRANSPARENT &&
776 voice.setting != BT_VOICE_CVSD_16BIT) {
777 err = -EINVAL;
778 break;
779 }
780
781 sco_pi(sk)->setting = voice.setting;
782 break;
783
743 default: 784 default:
744 err = -ENOPROTOOPT; 785 err = -ENOPROTOOPT;
745 break; 786 break;
@@ -765,7 +806,9 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
765 806
766 switch (optname) { 807 switch (optname) {
767 case SCO_OPTIONS: 808 case SCO_OPTIONS:
768 if (sk->sk_state != BT_CONNECTED) { 809 if (sk->sk_state != BT_CONNECTED &&
810 !(sk->sk_state == BT_CONNECT2 &&
811 test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))) {
769 err = -ENOTCONN; 812 err = -ENOTCONN;
770 break; 813 break;
771 } 814 }
@@ -781,7 +824,9 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
781 break; 824 break;
782 825
783 case SCO_CONNINFO: 826 case SCO_CONNINFO:
784 if (sk->sk_state != BT_CONNECTED) { 827 if (sk->sk_state != BT_CONNECTED &&
828 !(sk->sk_state == BT_CONNECT2 &&
829 test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))) {
785 err = -ENOTCONN; 830 err = -ENOTCONN;
786 break; 831 break;
787 } 832 }
@@ -809,6 +854,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
809{ 854{
810 struct sock *sk = sock->sk; 855 struct sock *sk = sock->sk;
811 int len, err = 0; 856 int len, err = 0;
857 struct bt_voice voice;
812 858
813 BT_DBG("sk %p", sk); 859 BT_DBG("sk %p", sk);
814 860
@@ -834,6 +880,15 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
834 880
835 break; 881 break;
836 882
883 case BT_VOICE:
884 voice.setting = sco_pi(sk)->setting;
885
886 len = min_t(unsigned int, len, sizeof(voice));
887 if (copy_to_user(optval, (char *)&voice, len))
888 err = -EFAULT;
889
890 break;
891
837 default: 892 default:
838 err = -ENOPROTOOPT; 893 err = -ENOPROTOOPT;
839 break; 894 break;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index b2296d3857a0..b5562abdd6e0 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -770,7 +770,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
770 770
771 BT_DBG("conn %p hcon %p level 0x%2.2x", conn, hcon, sec_level); 771 BT_DBG("conn %p hcon %p level 0x%2.2x", conn, hcon, sec_level);
772 772
773 if (!lmp_host_le_capable(hcon->hdev)) 773 if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags))
774 return 1; 774 return 1;
775 775
776 if (sec_level == BT_SECURITY_LOW) 776 if (sec_level == BT_SECURITY_LOW)
@@ -851,7 +851,7 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
851 __u8 reason; 851 __u8 reason;
852 int err = 0; 852 int err = 0;
853 853
854 if (!lmp_host_le_capable(conn->hcon->hdev)) { 854 if (!test_bit(HCI_LE_ENABLED, &conn->hcon->hdev->dev_flags)) {
855 err = -ENOTSUPP; 855 err = -ENOTSUPP;
856 reason = SMP_PAIRING_NOTSUPP; 856 reason = SMP_PAIRING_NOTSUPP;
857 goto done; 857 goto done;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 967312803e41..ca04163635da 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -22,6 +22,9 @@
22#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23#include "br_private.h" 23#include "br_private.h"
24 24
25#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \
26 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM)
27
25/* net device transmit always called with BH disabled */ 28/* net device transmit always called with BH disabled */
26netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) 29netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
27{ 30{
@@ -55,10 +58,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
55 skb_pull(skb, ETH_HLEN); 58 skb_pull(skb, ETH_HLEN);
56 59
57 if (is_broadcast_ether_addr(dest)) 60 if (is_broadcast_ether_addr(dest))
58 br_flood_deliver(br, skb); 61 br_flood_deliver(br, skb, false);
59 else if (is_multicast_ether_addr(dest)) { 62 else if (is_multicast_ether_addr(dest)) {
60 if (unlikely(netpoll_tx_running(dev))) { 63 if (unlikely(netpoll_tx_running(dev))) {
61 br_flood_deliver(br, skb); 64 br_flood_deliver(br, skb, false);
62 goto out; 65 goto out;
63 } 66 }
64 if (br_multicast_rcv(br, NULL, skb)) { 67 if (br_multicast_rcv(br, NULL, skb)) {
@@ -67,14 +70,15 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
67 } 70 }
68 71
69 mdst = br_mdb_get(br, skb, vid); 72 mdst = br_mdb_get(br, skb, vid);
70 if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) 73 if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
74 br_multicast_querier_exists(br, eth_hdr(skb)))
71 br_multicast_deliver(mdst, skb); 75 br_multicast_deliver(mdst, skb);
72 else 76 else
73 br_flood_deliver(br, skb); 77 br_flood_deliver(br, skb, false);
74 } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) 78 } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL)
75 br_deliver(dst->dst, skb); 79 br_deliver(dst->dst, skb);
76 else 80 else
77 br_flood_deliver(br, skb); 81 br_flood_deliver(br, skb, true);
78 82
79out: 83out:
80 rcu_read_unlock(); 84 rcu_read_unlock();
@@ -241,22 +245,22 @@ fail:
241int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) 245int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
242{ 246{
243 struct netpoll *np; 247 struct netpoll *np;
244 int err = 0; 248 int err;
249
250 if (!p->br->dev->npinfo)
251 return 0;
245 252
246 np = kzalloc(sizeof(*p->np), gfp); 253 np = kzalloc(sizeof(*p->np), gfp);
247 err = -ENOMEM;
248 if (!np) 254 if (!np)
249 goto out; 255 return -ENOMEM;
250 256
251 err = __netpoll_setup(np, p->dev, gfp); 257 err = __netpoll_setup(np, p->dev, gfp);
252 if (err) { 258 if (err) {
253 kfree(np); 259 kfree(np);
254 goto out; 260 return err;
255 } 261 }
256 262
257 p->np = np; 263 p->np = np;
258
259out:
260 return err; 264 return err;
261} 265}
262 266
@@ -346,12 +350,10 @@ void br_dev_setup(struct net_device *dev)
346 dev->tx_queue_len = 0; 350 dev->tx_queue_len = 0;
347 dev->priv_flags = IFF_EBRIDGE; 351 dev->priv_flags = IFF_EBRIDGE;
348 352
349 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 353 dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
350 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | 354 NETIF_F_HW_VLAN_CTAG_TX;
351 NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX; 355 dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX;
352 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 356 dev->vlan_features = COMMON_FEATURES;
353 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
354 NETIF_F_HW_VLAN_CTAG_TX;
355 357
356 br->dev = dev; 358 br->dev = dev;
357 spin_lock_init(&br->lock); 359 spin_lock_init(&br->lock);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index ebfa4443c69b..ffd5874f2592 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -161,7 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
161 if (!pv) 161 if (!pv)
162 return; 162 return;
163 163
164 for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { 164 for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
165 f = __br_fdb_get(br, br->dev->dev_addr, vid); 165 f = __br_fdb_get(br, br->dev->dev_addr, vid);
166 if (f && f->is_local && !f->dst) 166 if (f && f->is_local && !f->dst)
167 fdb_delete(br, f); 167 fdb_delete(br, f);
@@ -707,6 +707,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
707 } 707 }
708 } 708 }
709 709
710 if (is_zero_ether_addr(addr)) {
711 pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n");
712 return -EINVAL;
713 }
714
710 p = br_port_get_rtnl(dev); 715 p = br_port_get_rtnl(dev);
711 if (p == NULL) { 716 if (p == NULL) {
712 pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", 717 pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
@@ -725,7 +730,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
725 /* VID was specified, so use it. */ 730 /* VID was specified, so use it. */
726 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); 731 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
727 } else { 732 } else {
728 if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { 733 if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
729 err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); 734 err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
730 goto out; 735 goto out;
731 } 736 }
@@ -734,7 +739,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
734 * specify a VLAN. To be nice, add/update entry for every 739 * specify a VLAN. To be nice, add/update entry for every
735 * vlan on this port. 740 * vlan on this port.
736 */ 741 */
737 for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { 742 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
738 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); 743 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
739 if (err) 744 if (err)
740 goto out; 745 goto out;
@@ -812,7 +817,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
812 817
813 err = __br_fdb_delete(p, addr, vid); 818 err = __br_fdb_delete(p, addr, vid);
814 } else { 819 } else {
815 if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { 820 if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
816 err = __br_fdb_delete(p, addr, 0); 821 err = __br_fdb_delete(p, addr, 0);
817 goto out; 822 goto out;
818 } 823 }
@@ -822,7 +827,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
822 * vlan on this port. 827 * vlan on this port.
823 */ 828 */
824 err = -ENOENT; 829 err = -ENOENT;
825 for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { 830 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
826 err &= __br_fdb_delete(p, addr, vid); 831 err &= __br_fdb_delete(p, addr, vid);
827 } 832 }
828 } 833 }
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 092b20e4ee4c..4b81b1471789 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -174,7 +174,8 @@ out:
174static void br_flood(struct net_bridge *br, struct sk_buff *skb, 174static void br_flood(struct net_bridge *br, struct sk_buff *skb,
175 struct sk_buff *skb0, 175 struct sk_buff *skb0,
176 void (*__packet_hook)(const struct net_bridge_port *p, 176 void (*__packet_hook)(const struct net_bridge_port *p,
177 struct sk_buff *skb)) 177 struct sk_buff *skb),
178 bool unicast)
178{ 179{
179 struct net_bridge_port *p; 180 struct net_bridge_port *p;
180 struct net_bridge_port *prev; 181 struct net_bridge_port *prev;
@@ -182,6 +183,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
182 prev = NULL; 183 prev = NULL;
183 184
184 list_for_each_entry_rcu(p, &br->port_list, list) { 185 list_for_each_entry_rcu(p, &br->port_list, list) {
186 /* Do not flood unicast traffic to ports that turn it off */
187 if (unicast && !(p->flags & BR_FLOOD))
188 continue;
185 prev = maybe_deliver(prev, p, skb, __packet_hook); 189 prev = maybe_deliver(prev, p, skb, __packet_hook);
186 if (IS_ERR(prev)) 190 if (IS_ERR(prev))
187 goto out; 191 goto out;
@@ -203,16 +207,16 @@ out:
203 207
204 208
205/* called with rcu_read_lock */ 209/* called with rcu_read_lock */
206void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) 210void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast)
207{ 211{
208 br_flood(br, skb, NULL, __br_deliver); 212 br_flood(br, skb, NULL, __br_deliver, unicast);
209} 213}
210 214
211/* called under bridge lock */ 215/* called under bridge lock */
212void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, 216void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
213 struct sk_buff *skb2) 217 struct sk_buff *skb2, bool unicast)
214{ 218{
215 br_flood(br, skb, skb2, __br_forward); 219 br_flood(br, skb, skb2, __br_forward, unicast);
216} 220}
217 221
218#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 222#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 4cdba60926ff..c41d5fbb91d0 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -221,7 +221,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
221 p->path_cost = port_cost(dev); 221 p->path_cost = port_cost(dev);
222 p->priority = 0x8000 >> BR_PORT_BITS; 222 p->priority = 0x8000 >> BR_PORT_BITS;
223 p->port_no = index; 223 p->port_no = index;
224 p->flags = 0; 224 p->flags = BR_LEARNING | BR_FLOOD;
225 br_init_port(p); 225 br_init_port(p);
226 p->state = BR_STATE_DISABLED; 226 p->state = BR_STATE_DISABLED;
227 br_stp_port_timer_init(p); 227 br_stp_port_timer_init(p);
@@ -363,7 +363,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
363 if (err) 363 if (err)
364 goto err2; 364 goto err2;
365 365
366 if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL)))) 366 err = br_netpoll_enable(p, GFP_KERNEL);
367 if (err)
367 goto err3; 368 goto err3;
368 369
369 err = netdev_master_upper_dev_link(dev, br->dev); 370 err = netdev_master_upper_dev_link(dev, br->dev);
@@ -382,6 +383,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
382 383
383 netdev_update_features(br->dev); 384 netdev_update_features(br->dev);
384 385
386 if (br->dev->needed_headroom < dev->needed_headroom)
387 br->dev->needed_headroom = dev->needed_headroom;
388
385 spin_lock_bh(&br->lock); 389 spin_lock_bh(&br->lock);
386 changed_addr = br_stp_recalculate_bridge_id(br); 390 changed_addr = br_stp_recalculate_bridge_id(br);
387 391
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 828e2bcc1f52..a2fd37ec35f7 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -65,6 +65,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
65 struct net_bridge_fdb_entry *dst; 65 struct net_bridge_fdb_entry *dst;
66 struct net_bridge_mdb_entry *mdst; 66 struct net_bridge_mdb_entry *mdst;
67 struct sk_buff *skb2; 67 struct sk_buff *skb2;
68 bool unicast = true;
68 u16 vid = 0; 69 u16 vid = 0;
69 70
70 if (!p || p->state == BR_STATE_DISABLED) 71 if (!p || p->state == BR_STATE_DISABLED)
@@ -75,7 +76,8 @@ int br_handle_frame_finish(struct sk_buff *skb)
75 76
76 /* insert into forwarding database after filtering to avoid spoofing */ 77 /* insert into forwarding database after filtering to avoid spoofing */
77 br = p->br; 78 br = p->br;
78 br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); 79 if (p->flags & BR_LEARNING)
80 br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
79 81
80 if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && 82 if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
81 br_multicast_rcv(br, p, skb)) 83 br_multicast_rcv(br, p, skb))
@@ -94,11 +96,13 @@ int br_handle_frame_finish(struct sk_buff *skb)
94 96
95 dst = NULL; 97 dst = NULL;
96 98
97 if (is_broadcast_ether_addr(dest)) 99 if (is_broadcast_ether_addr(dest)) {
98 skb2 = skb; 100 skb2 = skb;
99 else if (is_multicast_ether_addr(dest)) { 101 unicast = false;
102 } else if (is_multicast_ether_addr(dest)) {
100 mdst = br_mdb_get(br, skb, vid); 103 mdst = br_mdb_get(br, skb, vid);
101 if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { 104 if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
105 br_multicast_querier_exists(br, eth_hdr(skb))) {
102 if ((mdst && mdst->mglist) || 106 if ((mdst && mdst->mglist) ||
103 br_multicast_is_router(br)) 107 br_multicast_is_router(br))
104 skb2 = skb; 108 skb2 = skb;
@@ -109,6 +113,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
109 } else 113 } else
110 skb2 = skb; 114 skb2 = skb;
111 115
116 unicast = false;
112 br->dev->stats.multicast++; 117 br->dev->stats.multicast++;
113 } else if ((dst = __br_fdb_get(br, dest, vid)) && 118 } else if ((dst = __br_fdb_get(br, dest, vid)) &&
114 dst->is_local) { 119 dst->is_local) {
@@ -122,7 +127,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
122 dst->used = jiffies; 127 dst->used = jiffies;
123 br_forward(dst->dst, skb, skb2); 128 br_forward(dst->dst, skb, skb2);
124 } else 129 } else
125 br_flood_forward(br, skb, skb2); 130 br_flood_forward(br, skb, skb2, unicast);
126 } 131 }
127 132
128 if (skb2) 133 if (skb2)
@@ -142,7 +147,8 @@ static int br_handle_local_finish(struct sk_buff *skb)
142 u16 vid = 0; 147 u16 vid = 0;
143 148
144 br_vlan_get_tag(skb, &vid); 149 br_vlan_get_tag(skb, &vid);
145 br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); 150 if (p->flags & BR_LEARNING)
151 br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid);
146 return 0; /* process further */ 152 return 0; /* process further */
147} 153}
148 154
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 19942e38fd2d..85a09bb5ca51 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -9,6 +9,7 @@
9#include <net/netlink.h> 9#include <net/netlink.h>
10#if IS_ENABLED(CONFIG_IPV6) 10#if IS_ENABLED(CONFIG_IPV6)
11#include <net/ipv6.h> 11#include <net/ipv6.h>
12#include <net/addrconf.h>
12#endif 13#endif
13 14
14#include "br_private.h" 15#include "br_private.h"
@@ -61,7 +62,8 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
61 62
62 for (i = 0; i < mdb->max; i++) { 63 for (i = 0; i < mdb->max; i++) {
63 struct net_bridge_mdb_entry *mp; 64 struct net_bridge_mdb_entry *mp;
64 struct net_bridge_port_group *p, **pp; 65 struct net_bridge_port_group *p;
66 struct net_bridge_port_group __rcu **pp;
65 struct net_bridge_port *port; 67 struct net_bridge_port *port;
66 68
67 hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) { 69 hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
@@ -253,7 +255,7 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
253 return false; 255 return false;
254#if IS_ENABLED(CONFIG_IPV6) 256#if IS_ENABLED(CONFIG_IPV6)
255 } else if (entry->addr.proto == htons(ETH_P_IPV6)) { 257 } else if (entry->addr.proto == htons(ETH_P_IPV6)) {
256 if (!ipv6_is_transient_multicast(&entry->addr.u.ip6)) 258 if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6))
257 return false; 259 return false;
258#endif 260#endif
259 } else 261 } else
@@ -414,16 +416,20 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
414 if (!netif_running(br->dev) || br->multicast_disabled) 416 if (!netif_running(br->dev) || br->multicast_disabled)
415 return -EINVAL; 417 return -EINVAL;
416 418
417 if (timer_pending(&br->multicast_querier_timer))
418 return -EBUSY;
419
420 ip.proto = entry->addr.proto; 419 ip.proto = entry->addr.proto;
421 if (ip.proto == htons(ETH_P_IP)) 420 if (ip.proto == htons(ETH_P_IP)) {
421 if (timer_pending(&br->ip4_querier.timer))
422 return -EBUSY;
423
422 ip.u.ip4 = entry->addr.u.ip4; 424 ip.u.ip4 = entry->addr.u.ip4;
423#if IS_ENABLED(CONFIG_IPV6) 425#if IS_ENABLED(CONFIG_IPV6)
424 else 426 } else {
427 if (timer_pending(&br->ip6_querier.timer))
428 return -EBUSY;
429
425 ip.u.ip6 = entry->addr.u.ip6; 430 ip.u.ip6 = entry->addr.u.ip6;
426#endif 431#endif
432 }
427 433
428 spin_lock_bh(&br->multicast_lock); 434 spin_lock_bh(&br->multicast_lock);
429 mdb = mlock_dereference(br->mdb, br); 435 mdb = mlock_dereference(br->mdb, br);
@@ -447,7 +453,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
447 call_rcu_bh(&p->rcu, br_multicast_free_pg); 453 call_rcu_bh(&p->rcu, br_multicast_free_pg);
448 err = 0; 454 err = 0;
449 455
450 if (!mp->ports && !mp->mglist && 456 if (!mp->ports && !mp->mglist && mp->timer_armed &&
451 netif_running(br->dev)) 457 netif_running(br->dev))
452 mod_timer(&mp->timer, jiffies); 458 mod_timer(&mp->timer, jiffies);
453 break; 459 break;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 81f2389f78eb..d1c578630678 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -23,16 +23,19 @@
23#include <linux/skbuff.h> 23#include <linux/skbuff.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/inetdevice.h>
26#include <net/ip.h> 27#include <net/ip.h>
27#if IS_ENABLED(CONFIG_IPV6) 28#if IS_ENABLED(CONFIG_IPV6)
28#include <net/ipv6.h> 29#include <net/ipv6.h>
29#include <net/mld.h> 30#include <net/mld.h>
30#include <net/ip6_checksum.h> 31#include <net/ip6_checksum.h>
32#include <net/addrconf.h>
31#endif 33#endif
32 34
33#include "br_private.h" 35#include "br_private.h"
34 36
35static void br_multicast_start_querier(struct net_bridge *br); 37static void br_multicast_start_querier(struct net_bridge *br,
38 struct bridge_mcast_query *query);
36unsigned int br_mdb_rehash_seq; 39unsigned int br_mdb_rehash_seq;
37 40
38static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) 41static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
@@ -269,7 +272,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
269 del_timer(&p->timer); 272 del_timer(&p->timer);
270 call_rcu_bh(&p->rcu, br_multicast_free_pg); 273 call_rcu_bh(&p->rcu, br_multicast_free_pg);
271 274
272 if (!mp->ports && !mp->mglist && 275 if (!mp->ports && !mp->mglist && mp->timer_armed &&
273 netif_running(br->dev)) 276 netif_running(br->dev))
274 mod_timer(&mp->timer, jiffies); 277 mod_timer(&mp->timer, jiffies);
275 278
@@ -381,7 +384,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
381 iph->frag_off = htons(IP_DF); 384 iph->frag_off = htons(IP_DF);
382 iph->ttl = 1; 385 iph->ttl = 1;
383 iph->protocol = IPPROTO_IGMP; 386 iph->protocol = IPPROTO_IGMP;
384 iph->saddr = 0; 387 iph->saddr = br->multicast_query_use_ifaddr ?
388 inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
385 iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); 389 iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
386 ((u8 *)&iph[1])[0] = IPOPT_RA; 390 ((u8 *)&iph[1])[0] = IPOPT_RA;
387 ((u8 *)&iph[1])[1] = 4; 391 ((u8 *)&iph[1])[1] = 4;
@@ -465,8 +469,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
465 skb_set_transport_header(skb, skb->len); 469 skb_set_transport_header(skb, skb->len);
466 mldq = (struct mld_msg *) icmp6_hdr(skb); 470 mldq = (struct mld_msg *) icmp6_hdr(skb);
467 471
468 interval = ipv6_addr_any(group) ? br->multicast_last_member_interval : 472 interval = ipv6_addr_any(group) ?
469 br->multicast_query_response_interval; 473 br->multicast_query_response_interval :
474 br->multicast_last_member_interval;
470 475
471 mldq->mld_type = ICMPV6_MGM_QUERY; 476 mldq->mld_type = ICMPV6_MGM_QUERY;
472 mldq->mld_code = 0; 477 mldq->mld_code = 0;
@@ -615,6 +620,7 @@ rehash:
615 620
616 mp->br = br; 621 mp->br = br;
617 mp->addr = *group; 622 mp->addr = *group;
623
618 setup_timer(&mp->timer, br_multicast_group_expired, 624 setup_timer(&mp->timer, br_multicast_group_expired,
619 (unsigned long)mp); 625 (unsigned long)mp);
620 626
@@ -654,7 +660,6 @@ static int br_multicast_add_group(struct net_bridge *br,
654 struct net_bridge_mdb_entry *mp; 660 struct net_bridge_mdb_entry *mp;
655 struct net_bridge_port_group *p; 661 struct net_bridge_port_group *p;
656 struct net_bridge_port_group __rcu **pp; 662 struct net_bridge_port_group __rcu **pp;
657 unsigned long now = jiffies;
658 int err; 663 int err;
659 664
660 spin_lock(&br->multicast_lock); 665 spin_lock(&br->multicast_lock);
@@ -669,7 +674,6 @@ static int br_multicast_add_group(struct net_bridge *br,
669 674
670 if (!port) { 675 if (!port) {
671 mp->mglist = true; 676 mp->mglist = true;
672 mod_timer(&mp->timer, now + br->multicast_membership_interval);
673 goto out; 677 goto out;
674 } 678 }
675 679
@@ -677,7 +681,7 @@ static int br_multicast_add_group(struct net_bridge *br,
677 (p = mlock_dereference(*pp, br)) != NULL; 681 (p = mlock_dereference(*pp, br)) != NULL;
678 pp = &p->next) { 682 pp = &p->next) {
679 if (p->port == port) 683 if (p->port == port)
680 goto found; 684 goto out;
681 if ((unsigned long)p->port < (unsigned long)port) 685 if ((unsigned long)p->port < (unsigned long)port)
682 break; 686 break;
683 } 687 }
@@ -688,8 +692,6 @@ static int br_multicast_add_group(struct net_bridge *br,
688 rcu_assign_pointer(*pp, p); 692 rcu_assign_pointer(*pp, p);
689 br_mdb_notify(br->dev, port, group, RTM_NEWMDB); 693 br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
690 694
691found:
692 mod_timer(&p->timer, now + br->multicast_membership_interval);
693out: 695out:
694 err = 0; 696 err = 0;
695 697
@@ -723,7 +725,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
723{ 725{
724 struct br_ip br_group; 726 struct br_ip br_group;
725 727
726 if (!ipv6_is_transient_multicast(group)) 728 if (ipv6_addr_is_ll_all_nodes(group))
727 return 0; 729 return 0;
728 730
729 br_group.u.ip6 = *group; 731 br_group.u.ip6 = *group;
@@ -755,20 +757,35 @@ static void br_multicast_local_router_expired(unsigned long data)
755{ 757{
756} 758}
757 759
758static void br_multicast_querier_expired(unsigned long data) 760static void br_multicast_querier_expired(struct net_bridge *br,
761 struct bridge_mcast_query *query)
759{ 762{
760 struct net_bridge *br = (void *)data;
761
762 spin_lock(&br->multicast_lock); 763 spin_lock(&br->multicast_lock);
763 if (!netif_running(br->dev) || br->multicast_disabled) 764 if (!netif_running(br->dev) || br->multicast_disabled)
764 goto out; 765 goto out;
765 766
766 br_multicast_start_querier(br); 767 br_multicast_start_querier(br, query);
767 768
768out: 769out:
769 spin_unlock(&br->multicast_lock); 770 spin_unlock(&br->multicast_lock);
770} 771}
771 772
773static void br_ip4_multicast_querier_expired(unsigned long data)
774{
775 struct net_bridge *br = (void *)data;
776
777 br_multicast_querier_expired(br, &br->ip4_query);
778}
779
780#if IS_ENABLED(CONFIG_IPV6)
781static void br_ip6_multicast_querier_expired(unsigned long data)
782{
783 struct net_bridge *br = (void *)data;
784
785 br_multicast_querier_expired(br, &br->ip6_query);
786}
787#endif
788
772static void __br_multicast_send_query(struct net_bridge *br, 789static void __br_multicast_send_query(struct net_bridge *br,
773 struct net_bridge_port *port, 790 struct net_bridge_port *port,
774 struct br_ip *ip) 791 struct br_ip *ip)
@@ -789,37 +806,45 @@ static void __br_multicast_send_query(struct net_bridge *br,
789} 806}
790 807
791static void br_multicast_send_query(struct net_bridge *br, 808static void br_multicast_send_query(struct net_bridge *br,
792 struct net_bridge_port *port, u32 sent) 809 struct net_bridge_port *port,
810 struct bridge_mcast_query *query)
793{ 811{
794 unsigned long time; 812 unsigned long time;
795 struct br_ip br_group; 813 struct br_ip br_group;
814 struct bridge_mcast_querier *querier = NULL;
796 815
797 if (!netif_running(br->dev) || br->multicast_disabled || 816 if (!netif_running(br->dev) || br->multicast_disabled ||
798 !br->multicast_querier || 817 !br->multicast_querier)
799 timer_pending(&br->multicast_querier_timer))
800 return; 818 return;
801 819
802 memset(&br_group.u, 0, sizeof(br_group.u)); 820 memset(&br_group.u, 0, sizeof(br_group.u));
803 821
804 br_group.proto = htons(ETH_P_IP); 822 if (port ? (query == &port->ip4_query) :
805 __br_multicast_send_query(br, port, &br_group); 823 (query == &br->ip4_query)) {
806 824 querier = &br->ip4_querier;
825 br_group.proto = htons(ETH_P_IP);
807#if IS_ENABLED(CONFIG_IPV6) 826#if IS_ENABLED(CONFIG_IPV6)
808 br_group.proto = htons(ETH_P_IPV6); 827 } else {
809 __br_multicast_send_query(br, port, &br_group); 828 querier = &br->ip6_querier;
829 br_group.proto = htons(ETH_P_IPV6);
810#endif 830#endif
831 }
832
833 if (!querier || timer_pending(&querier->timer))
834 return;
835
836 __br_multicast_send_query(br, port, &br_group);
811 837
812 time = jiffies; 838 time = jiffies;
813 time += sent < br->multicast_startup_query_count ? 839 time += query->startup_sent < br->multicast_startup_query_count ?
814 br->multicast_startup_query_interval : 840 br->multicast_startup_query_interval :
815 br->multicast_query_interval; 841 br->multicast_query_interval;
816 mod_timer(port ? &port->multicast_query_timer : 842 mod_timer(&query->timer, time);
817 &br->multicast_query_timer, time);
818} 843}
819 844
820static void br_multicast_port_query_expired(unsigned long data) 845static void br_multicast_port_query_expired(struct net_bridge_port *port,
846 struct bridge_mcast_query *query)
821{ 847{
822 struct net_bridge_port *port = (void *)data;
823 struct net_bridge *br = port->br; 848 struct net_bridge *br = port->br;
824 849
825 spin_lock(&br->multicast_lock); 850 spin_lock(&br->multicast_lock);
@@ -827,25 +852,43 @@ static void br_multicast_port_query_expired(unsigned long data)
827 port->state == BR_STATE_BLOCKING) 852 port->state == BR_STATE_BLOCKING)
828 goto out; 853 goto out;
829 854
830 if (port->multicast_startup_queries_sent < 855 if (query->startup_sent < br->multicast_startup_query_count)
831 br->multicast_startup_query_count) 856 query->startup_sent++;
832 port->multicast_startup_queries_sent++;
833 857
834 br_multicast_send_query(port->br, port, 858 br_multicast_send_query(port->br, port, query);
835 port->multicast_startup_queries_sent);
836 859
837out: 860out:
838 spin_unlock(&br->multicast_lock); 861 spin_unlock(&br->multicast_lock);
839} 862}
840 863
864static void br_ip4_multicast_port_query_expired(unsigned long data)
865{
866 struct net_bridge_port *port = (void *)data;
867
868 br_multicast_port_query_expired(port, &port->ip4_query);
869}
870
871#if IS_ENABLED(CONFIG_IPV6)
872static void br_ip6_multicast_port_query_expired(unsigned long data)
873{
874 struct net_bridge_port *port = (void *)data;
875
876 br_multicast_port_query_expired(port, &port->ip6_query);
877}
878#endif
879
841void br_multicast_add_port(struct net_bridge_port *port) 880void br_multicast_add_port(struct net_bridge_port *port)
842{ 881{
843 port->multicast_router = 1; 882 port->multicast_router = 1;
844 883
845 setup_timer(&port->multicast_router_timer, br_multicast_router_expired, 884 setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
846 (unsigned long)port); 885 (unsigned long)port);
847 setup_timer(&port->multicast_query_timer, 886 setup_timer(&port->ip4_query.timer, br_ip4_multicast_port_query_expired,
848 br_multicast_port_query_expired, (unsigned long)port); 887 (unsigned long)port);
888#if IS_ENABLED(CONFIG_IPV6)
889 setup_timer(&port->ip6_query.timer, br_ip6_multicast_port_query_expired,
890 (unsigned long)port);
891#endif
849} 892}
850 893
851void br_multicast_del_port(struct net_bridge_port *port) 894void br_multicast_del_port(struct net_bridge_port *port)
@@ -853,13 +896,13 @@ void br_multicast_del_port(struct net_bridge_port *port)
853 del_timer_sync(&port->multicast_router_timer); 896 del_timer_sync(&port->multicast_router_timer);
854} 897}
855 898
856static void __br_multicast_enable_port(struct net_bridge_port *port) 899static void br_multicast_enable(struct bridge_mcast_query *query)
857{ 900{
858 port->multicast_startup_queries_sent = 0; 901 query->startup_sent = 0;
859 902
860 if (try_to_del_timer_sync(&port->multicast_query_timer) >= 0 || 903 if (try_to_del_timer_sync(&query->timer) >= 0 ||
861 del_timer(&port->multicast_query_timer)) 904 del_timer(&query->timer))
862 mod_timer(&port->multicast_query_timer, jiffies); 905 mod_timer(&query->timer, jiffies);
863} 906}
864 907
865void br_multicast_enable_port(struct net_bridge_port *port) 908void br_multicast_enable_port(struct net_bridge_port *port)
@@ -870,7 +913,10 @@ void br_multicast_enable_port(struct net_bridge_port *port)
870 if (br->multicast_disabled || !netif_running(br->dev)) 913 if (br->multicast_disabled || !netif_running(br->dev))
871 goto out; 914 goto out;
872 915
873 __br_multicast_enable_port(port); 916 br_multicast_enable(&port->ip4_query);
917#if IS_ENABLED(CONFIG_IPV6)
918 br_multicast_enable(&port->ip6_query);
919#endif
874 920
875out: 921out:
876 spin_unlock(&br->multicast_lock); 922 spin_unlock(&br->multicast_lock);
@@ -889,7 +935,10 @@ void br_multicast_disable_port(struct net_bridge_port *port)
889 if (!hlist_unhashed(&port->rlist)) 935 if (!hlist_unhashed(&port->rlist))
890 hlist_del_init_rcu(&port->rlist); 936 hlist_del_init_rcu(&port->rlist);
891 del_timer(&port->multicast_router_timer); 937 del_timer(&port->multicast_router_timer);
892 del_timer(&port->multicast_query_timer); 938 del_timer(&port->ip4_query.timer);
939#if IS_ENABLED(CONFIG_IPV6)
940 del_timer(&port->ip6_query.timer);
941#endif
893 spin_unlock(&br->multicast_lock); 942 spin_unlock(&br->multicast_lock);
894} 943}
895 944
@@ -1014,8 +1063,19 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
1014} 1063}
1015#endif 1064#endif
1016 1065
1066static void
1067br_multicast_update_querier_timer(struct net_bridge *br,
1068 struct bridge_mcast_querier *querier,
1069 unsigned long max_delay)
1070{
1071 if (!timer_pending(&querier->timer))
1072 querier->delay_time = jiffies + max_delay;
1073
1074 mod_timer(&querier->timer, jiffies + br->multicast_querier_interval);
1075}
1076
1017/* 1077/*
1018 * Add port to rotuer_list 1078 * Add port to router_list
1019 * list is maintained ordered by pointer value 1079 * list is maintained ordered by pointer value
1020 * and locked by br->multicast_lock and RCU 1080 * and locked by br->multicast_lock and RCU
1021 */ 1081 */
@@ -1064,12 +1124,13 @@ timer:
1064 1124
1065static void br_multicast_query_received(struct net_bridge *br, 1125static void br_multicast_query_received(struct net_bridge *br,
1066 struct net_bridge_port *port, 1126 struct net_bridge_port *port,
1067 int saddr) 1127 struct bridge_mcast_querier *querier,
1128 int saddr,
1129 unsigned long max_delay)
1068{ 1130{
1069 if (saddr) 1131 if (saddr)
1070 mod_timer(&br->multicast_querier_timer, 1132 br_multicast_update_querier_timer(br, querier, max_delay);
1071 jiffies + br->multicast_querier_interval); 1133 else if (timer_pending(&querier->timer))
1072 else if (timer_pending(&br->multicast_querier_timer))
1073 return; 1134 return;
1074 1135
1075 br_multicast_mark_router(br, port); 1136 br_multicast_mark_router(br, port);
@@ -1096,8 +1157,6 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1096 (port && port->state == BR_STATE_DISABLED)) 1157 (port && port->state == BR_STATE_DISABLED))
1097 goto out; 1158 goto out;
1098 1159
1099 br_multicast_query_received(br, port, !!iph->saddr);
1100
1101 group = ih->group; 1160 group = ih->group;
1102 1161
1103 if (skb->len == sizeof(*ih)) { 1162 if (skb->len == sizeof(*ih)) {
@@ -1121,6 +1180,9 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1121 IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; 1180 IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1;
1122 } 1181 }
1123 1182
1183 br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr,
1184 max_delay);
1185
1124 if (!group) 1186 if (!group)
1125 goto out; 1187 goto out;
1126 1188
@@ -1129,6 +1191,9 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1129 if (!mp) 1191 if (!mp)
1130 goto out; 1192 goto out;
1131 1193
1194 mod_timer(&mp->timer, now + br->multicast_membership_interval);
1195 mp->timer_armed = true;
1196
1132 max_delay *= br->multicast_last_member_count; 1197 max_delay *= br->multicast_last_member_count;
1133 1198
1134 if (mp->mglist && 1199 if (mp->mglist &&
@@ -1173,8 +1238,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1173 (port && port->state == BR_STATE_DISABLED)) 1238 (port && port->state == BR_STATE_DISABLED))
1174 goto out; 1239 goto out;
1175 1240
1176 br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr));
1177
1178 if (skb->len == sizeof(*mld)) { 1241 if (skb->len == sizeof(*mld)) {
1179 if (!pskb_may_pull(skb, sizeof(*mld))) { 1242 if (!pskb_may_pull(skb, sizeof(*mld))) {
1180 err = -EINVAL; 1243 err = -EINVAL;
@@ -1184,7 +1247,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1184 max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay)); 1247 max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay));
1185 if (max_delay) 1248 if (max_delay)
1186 group = &mld->mld_mca; 1249 group = &mld->mld_mca;
1187 } else if (skb->len >= sizeof(*mld2q)) { 1250 } else {
1188 if (!pskb_may_pull(skb, sizeof(*mld2q))) { 1251 if (!pskb_may_pull(skb, sizeof(*mld2q))) {
1189 err = -EINVAL; 1252 err = -EINVAL;
1190 goto out; 1253 goto out;
@@ -1192,9 +1255,13 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1192 mld2q = (struct mld2_query *)icmp6_hdr(skb); 1255 mld2q = (struct mld2_query *)icmp6_hdr(skb);
1193 if (!mld2q->mld2q_nsrcs) 1256 if (!mld2q->mld2q_nsrcs)
1194 group = &mld2q->mld2q_mca; 1257 group = &mld2q->mld2q_mca;
1195 max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; 1258
1259 max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL);
1196 } 1260 }
1197 1261
1262 br_multicast_query_received(br, port, &br->ip6_querier,
1263 !ipv6_addr_any(&ip6h->saddr), max_delay);
1264
1198 if (!group) 1265 if (!group)
1199 goto out; 1266 goto out;
1200 1267
@@ -1203,6 +1270,9 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1203 if (!mp) 1270 if (!mp)
1204 goto out; 1271 goto out;
1205 1272
1273 mod_timer(&mp->timer, now + br->multicast_membership_interval);
1274 mp->timer_armed = true;
1275
1206 max_delay *= br->multicast_last_member_count; 1276 max_delay *= br->multicast_last_member_count;
1207 if (mp->mglist && 1277 if (mp->mglist &&
1208 (timer_pending(&mp->timer) ? 1278 (timer_pending(&mp->timer) ?
@@ -1227,7 +1297,9 @@ out:
1227 1297
1228static void br_multicast_leave_group(struct net_bridge *br, 1298static void br_multicast_leave_group(struct net_bridge *br,
1229 struct net_bridge_port *port, 1299 struct net_bridge_port *port,
1230 struct br_ip *group) 1300 struct br_ip *group,
1301 struct bridge_mcast_querier *querier,
1302 struct bridge_mcast_query *query)
1231{ 1303{
1232 struct net_bridge_mdb_htable *mdb; 1304 struct net_bridge_mdb_htable *mdb;
1233 struct net_bridge_mdb_entry *mp; 1305 struct net_bridge_mdb_entry *mp;
@@ -1238,7 +1310,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
1238 spin_lock(&br->multicast_lock); 1310 spin_lock(&br->multicast_lock);
1239 if (!netif_running(br->dev) || 1311 if (!netif_running(br->dev) ||
1240 (port && port->state == BR_STATE_DISABLED) || 1312 (port && port->state == BR_STATE_DISABLED) ||
1241 timer_pending(&br->multicast_querier_timer)) 1313 timer_pending(&querier->timer))
1242 goto out; 1314 goto out;
1243 1315
1244 mdb = mlock_dereference(br->mdb, br); 1316 mdb = mlock_dereference(br->mdb, br);
@@ -1246,6 +1318,31 @@ static void br_multicast_leave_group(struct net_bridge *br,
1246 if (!mp) 1318 if (!mp)
1247 goto out; 1319 goto out;
1248 1320
1321 if (br->multicast_querier) {
1322 __br_multicast_send_query(br, port, &mp->addr);
1323
1324 time = jiffies + br->multicast_last_member_count *
1325 br->multicast_last_member_interval;
1326
1327 mod_timer(&query->timer, time);
1328
1329 for (p = mlock_dereference(mp->ports, br);
1330 p != NULL;
1331 p = mlock_dereference(p->next, br)) {
1332 if (p->port != port)
1333 continue;
1334
1335 if (!hlist_unhashed(&p->mglist) &&
1336 (timer_pending(&p->timer) ?
1337 time_after(p->timer.expires, time) :
1338 try_to_del_timer_sync(&p->timer) >= 0)) {
1339 mod_timer(&p->timer, time);
1340 }
1341
1342 break;
1343 }
1344 }
1345
1249 if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { 1346 if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
1250 struct net_bridge_port_group __rcu **pp; 1347 struct net_bridge_port_group __rcu **pp;
1251 1348
@@ -1261,7 +1358,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
1261 call_rcu_bh(&p->rcu, br_multicast_free_pg); 1358 call_rcu_bh(&p->rcu, br_multicast_free_pg);
1262 br_mdb_notify(br->dev, port, group, RTM_DELMDB); 1359 br_mdb_notify(br->dev, port, group, RTM_DELMDB);
1263 1360
1264 if (!mp->ports && !mp->mglist && 1361 if (!mp->ports && !mp->mglist && mp->timer_armed &&
1265 netif_running(br->dev)) 1362 netif_running(br->dev))
1266 mod_timer(&mp->timer, jiffies); 1363 mod_timer(&mp->timer, jiffies);
1267 } 1364 }
@@ -1273,32 +1370,13 @@ static void br_multicast_leave_group(struct net_bridge *br,
1273 br->multicast_last_member_interval; 1370 br->multicast_last_member_interval;
1274 1371
1275 if (!port) { 1372 if (!port) {
1276 if (mp->mglist && 1373 if (mp->mglist && mp->timer_armed &&
1277 (timer_pending(&mp->timer) ? 1374 (timer_pending(&mp->timer) ?
1278 time_after(mp->timer.expires, time) : 1375 time_after(mp->timer.expires, time) :
1279 try_to_del_timer_sync(&mp->timer) >= 0)) { 1376 try_to_del_timer_sync(&mp->timer) >= 0)) {
1280 mod_timer(&mp->timer, time); 1377 mod_timer(&mp->timer, time);
1281 } 1378 }
1282
1283 goto out;
1284 } 1379 }
1285
1286 for (p = mlock_dereference(mp->ports, br);
1287 p != NULL;
1288 p = mlock_dereference(p->next, br)) {
1289 if (p->port != port)
1290 continue;
1291
1292 if (!hlist_unhashed(&p->mglist) &&
1293 (timer_pending(&p->timer) ?
1294 time_after(p->timer.expires, time) :
1295 try_to_del_timer_sync(&p->timer) >= 0)) {
1296 mod_timer(&p->timer, time);
1297 }
1298
1299 break;
1300 }
1301
1302out: 1380out:
1303 spin_unlock(&br->multicast_lock); 1381 spin_unlock(&br->multicast_lock);
1304} 1382}
@@ -1309,6 +1387,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
1309 __u16 vid) 1387 __u16 vid)
1310{ 1388{
1311 struct br_ip br_group; 1389 struct br_ip br_group;
1390 struct bridge_mcast_query *query = port ? &port->ip4_query :
1391 &br->ip4_query;
1312 1392
1313 if (ipv4_is_local_multicast(group)) 1393 if (ipv4_is_local_multicast(group))
1314 return; 1394 return;
@@ -1317,7 +1397,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
1317 br_group.proto = htons(ETH_P_IP); 1397 br_group.proto = htons(ETH_P_IP);
1318 br_group.vid = vid; 1398 br_group.vid = vid;
1319 1399
1320 br_multicast_leave_group(br, port, &br_group); 1400 br_multicast_leave_group(br, port, &br_group, &br->ip4_querier, query);
1321} 1401}
1322 1402
1323#if IS_ENABLED(CONFIG_IPV6) 1403#if IS_ENABLED(CONFIG_IPV6)
@@ -1327,15 +1407,18 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
1327 __u16 vid) 1407 __u16 vid)
1328{ 1408{
1329 struct br_ip br_group; 1409 struct br_ip br_group;
1410 struct bridge_mcast_query *query = port ? &port->ip6_query :
1411 &br->ip6_query;
1412
1330 1413
1331 if (!ipv6_is_transient_multicast(group)) 1414 if (ipv6_addr_is_ll_all_nodes(group))
1332 return; 1415 return;
1333 1416
1334 br_group.u.ip6 = *group; 1417 br_group.u.ip6 = *group;
1335 br_group.proto = htons(ETH_P_IPV6); 1418 br_group.proto = htons(ETH_P_IPV6);
1336 br_group.vid = vid; 1419 br_group.vid = vid;
1337 1420
1338 br_multicast_leave_group(br, port, &br_group); 1421 br_multicast_leave_group(br, port, &br_group, &br->ip6_querier, query);
1339} 1422}
1340#endif 1423#endif
1341 1424
@@ -1465,8 +1548,14 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1465 * - MLD has always Router Alert hop-by-hop option 1548 * - MLD has always Router Alert hop-by-hop option
1466 * - But we do not support jumbrograms. 1549 * - But we do not support jumbrograms.
1467 */ 1550 */
1468 if (ip6h->version != 6 || 1551 if (ip6h->version != 6)
1469 ip6h->nexthdr != IPPROTO_HOPOPTS || 1552 return 0;
1553
1554 /* Prevent flooding this packet if there is no listener present */
1555 if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr))
1556 BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
1557
1558 if (ip6h->nexthdr != IPPROTO_HOPOPTS ||
1470 ip6h->payload_len == 0) 1559 ip6h->payload_len == 0)
1471 return 0; 1560 return 0;
1472 1561
@@ -1597,19 +1686,32 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
1597 return 0; 1686 return 0;
1598} 1687}
1599 1688
1600static void br_multicast_query_expired(unsigned long data) 1689static void br_multicast_query_expired(struct net_bridge *br,
1690 struct bridge_mcast_query *query)
1691{
1692 spin_lock(&br->multicast_lock);
1693 if (query->startup_sent < br->multicast_startup_query_count)
1694 query->startup_sent++;
1695
1696 br_multicast_send_query(br, NULL, query);
1697 spin_unlock(&br->multicast_lock);
1698}
1699
1700static void br_ip4_multicast_query_expired(unsigned long data)
1601{ 1701{
1602 struct net_bridge *br = (void *)data; 1702 struct net_bridge *br = (void *)data;
1603 1703
1604 spin_lock(&br->multicast_lock); 1704 br_multicast_query_expired(br, &br->ip4_query);
1605 if (br->multicast_startup_queries_sent < 1705}
1606 br->multicast_startup_query_count)
1607 br->multicast_startup_queries_sent++;
1608 1706
1609 br_multicast_send_query(br, NULL, br->multicast_startup_queries_sent); 1707#if IS_ENABLED(CONFIG_IPV6)
1708static void br_ip6_multicast_query_expired(unsigned long data)
1709{
1710 struct net_bridge *br = (void *)data;
1610 1711
1611 spin_unlock(&br->multicast_lock); 1712 br_multicast_query_expired(br, &br->ip6_query);
1612} 1713}
1714#endif
1613 1715
1614void br_multicast_init(struct net_bridge *br) 1716void br_multicast_init(struct net_bridge *br)
1615{ 1717{
@@ -1618,6 +1720,7 @@ void br_multicast_init(struct net_bridge *br)
1618 1720
1619 br->multicast_router = 1; 1721 br->multicast_router = 1;
1620 br->multicast_querier = 0; 1722 br->multicast_querier = 0;
1723 br->multicast_query_use_ifaddr = 0;
1621 br->multicast_last_member_count = 2; 1724 br->multicast_last_member_count = 2;
1622 br->multicast_startup_query_count = 2; 1725 br->multicast_startup_query_count = 2;
1623 1726
@@ -1628,23 +1731,43 @@ void br_multicast_init(struct net_bridge *br)
1628 br->multicast_querier_interval = 255 * HZ; 1731 br->multicast_querier_interval = 255 * HZ;
1629 br->multicast_membership_interval = 260 * HZ; 1732 br->multicast_membership_interval = 260 * HZ;
1630 1733
1734 br->ip4_querier.delay_time = 0;
1735#if IS_ENABLED(CONFIG_IPV6)
1736 br->ip6_querier.delay_time = 0;
1737#endif
1738
1631 spin_lock_init(&br->multicast_lock); 1739 spin_lock_init(&br->multicast_lock);
1632 setup_timer(&br->multicast_router_timer, 1740 setup_timer(&br->multicast_router_timer,
1633 br_multicast_local_router_expired, 0); 1741 br_multicast_local_router_expired, 0);
1634 setup_timer(&br->multicast_querier_timer, 1742 setup_timer(&br->ip4_querier.timer, br_ip4_multicast_querier_expired,
1635 br_multicast_querier_expired, (unsigned long)br); 1743 (unsigned long)br);
1636 setup_timer(&br->multicast_query_timer, br_multicast_query_expired, 1744 setup_timer(&br->ip4_query.timer, br_ip4_multicast_query_expired,
1745 (unsigned long)br);
1746#if IS_ENABLED(CONFIG_IPV6)
1747 setup_timer(&br->ip6_querier.timer, br_ip6_multicast_querier_expired,
1748 (unsigned long)br);
1749 setup_timer(&br->ip6_query.timer, br_ip6_multicast_query_expired,
1637 (unsigned long)br); 1750 (unsigned long)br);
1751#endif
1638} 1752}
1639 1753
1640void br_multicast_open(struct net_bridge *br) 1754static void __br_multicast_open(struct net_bridge *br,
1755 struct bridge_mcast_query *query)
1641{ 1756{
1642 br->multicast_startup_queries_sent = 0; 1757 query->startup_sent = 0;
1643 1758
1644 if (br->multicast_disabled) 1759 if (br->multicast_disabled)
1645 return; 1760 return;
1646 1761
1647 mod_timer(&br->multicast_query_timer, jiffies); 1762 mod_timer(&query->timer, jiffies);
1763}
1764
1765void br_multicast_open(struct net_bridge *br)
1766{
1767 __br_multicast_open(br, &br->ip4_query);
1768#if IS_ENABLED(CONFIG_IPV6)
1769 __br_multicast_open(br, &br->ip6_query);
1770#endif
1648} 1771}
1649 1772
1650void br_multicast_stop(struct net_bridge *br) 1773void br_multicast_stop(struct net_bridge *br)
@@ -1656,8 +1779,12 @@ void br_multicast_stop(struct net_bridge *br)
1656 int i; 1779 int i;
1657 1780
1658 del_timer_sync(&br->multicast_router_timer); 1781 del_timer_sync(&br->multicast_router_timer);
1659 del_timer_sync(&br->multicast_querier_timer); 1782 del_timer_sync(&br->ip4_querier.timer);
1660 del_timer_sync(&br->multicast_query_timer); 1783 del_timer_sync(&br->ip4_query.timer);
1784#if IS_ENABLED(CONFIG_IPV6)
1785 del_timer_sync(&br->ip6_querier.timer);
1786 del_timer_sync(&br->ip6_query.timer);
1787#endif
1661 1788
1662 spin_lock_bh(&br->multicast_lock); 1789 spin_lock_bh(&br->multicast_lock);
1663 mdb = mlock_dereference(br->mdb, br); 1790 mdb = mlock_dereference(br->mdb, br);
@@ -1671,6 +1798,7 @@ void br_multicast_stop(struct net_bridge *br)
1671 hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], 1798 hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
1672 hlist[ver]) { 1799 hlist[ver]) {
1673 del_timer(&mp->timer); 1800 del_timer(&mp->timer);
1801 mp->timer_armed = false;
1674 call_rcu_bh(&mp->rcu, br_multicast_free_group); 1802 call_rcu_bh(&mp->rcu, br_multicast_free_group);
1675 } 1803 }
1676 } 1804 }
@@ -1759,18 +1887,24 @@ unlock:
1759 return err; 1887 return err;
1760} 1888}
1761 1889
1762static void br_multicast_start_querier(struct net_bridge *br) 1890static void br_multicast_start_querier(struct net_bridge *br,
1891 struct bridge_mcast_query *query)
1763{ 1892{
1764 struct net_bridge_port *port; 1893 struct net_bridge_port *port;
1765 1894
1766 br_multicast_open(br); 1895 __br_multicast_open(br, query);
1767 1896
1768 list_for_each_entry(port, &br->port_list, list) { 1897 list_for_each_entry(port, &br->port_list, list) {
1769 if (port->state == BR_STATE_DISABLED || 1898 if (port->state == BR_STATE_DISABLED ||
1770 port->state == BR_STATE_BLOCKING) 1899 port->state == BR_STATE_BLOCKING)
1771 continue; 1900 continue;
1772 1901
1773 __br_multicast_enable_port(port); 1902 if (query == &br->ip4_query)
1903 br_multicast_enable(&port->ip4_query);
1904#if IS_ENABLED(CONFIG_IPV6)
1905 else
1906 br_multicast_enable(&port->ip6_query);
1907#endif
1774 } 1908 }
1775} 1909}
1776 1910
@@ -1805,7 +1939,10 @@ rollback:
1805 goto rollback; 1939 goto rollback;
1806 } 1940 }
1807 1941
1808 br_multicast_start_querier(br); 1942 br_multicast_start_querier(br, &br->ip4_query);
1943#if IS_ENABLED(CONFIG_IPV6)
1944 br_multicast_start_querier(br, &br->ip6_query);
1945#endif
1809 1946
1810unlock: 1947unlock:
1811 spin_unlock_bh(&br->multicast_lock); 1948 spin_unlock_bh(&br->multicast_lock);
@@ -1815,6 +1952,8 @@ unlock:
1815 1952
1816int br_multicast_set_querier(struct net_bridge *br, unsigned long val) 1953int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
1817{ 1954{
1955 unsigned long max_delay;
1956
1818 val = !!val; 1957 val = !!val;
1819 1958
1820 spin_lock_bh(&br->multicast_lock); 1959 spin_lock_bh(&br->multicast_lock);
@@ -1822,8 +1961,22 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
1822 goto unlock; 1961 goto unlock;
1823 1962
1824 br->multicast_querier = val; 1963 br->multicast_querier = val;
1825 if (val) 1964 if (!val)
1826 br_multicast_start_querier(br); 1965 goto unlock;
1966
1967 max_delay = br->multicast_query_response_interval;
1968
1969 if (!timer_pending(&br->ip4_querier.timer))
1970 br->ip4_querier.delay_time = jiffies + max_delay;
1971
1972 br_multicast_start_querier(br, &br->ip4_query);
1973
1974#if IS_ENABLED(CONFIG_IPV6)
1975 if (!timer_pending(&br->ip6_querier.timer))
1976 br->ip6_querier.delay_time = jiffies + max_delay;
1977
1978 br_multicast_start_querier(br, &br->ip6_query);
1979#endif
1827 1980
1828unlock: 1981unlock:
1829 spin_unlock_bh(&br->multicast_lock); 1982 spin_unlock_bh(&br->multicast_lock);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 1ed75bfd8d1d..f87736270eaa 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -992,7 +992,7 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
992 992
993#ifdef CONFIG_SYSCTL 993#ifdef CONFIG_SYSCTL
994static 994static
995int brnf_sysctl_call_tables(ctl_table * ctl, int write, 995int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
996 void __user * buffer, size_t * lenp, loff_t * ppos) 996 void __user * buffer, size_t * lenp, loff_t * ppos)
997{ 997{
998 int ret; 998 int ret;
@@ -1004,7 +1004,7 @@ int brnf_sysctl_call_tables(ctl_table * ctl, int write,
1004 return ret; 1004 return ret;
1005} 1005}
1006 1006
1007static ctl_table brnf_table[] = { 1007static struct ctl_table brnf_table[] = {
1008 { 1008 {
1009 .procname = "bridge-nf-call-arptables", 1009 .procname = "bridge-nf-call-arptables",
1010 .data = &brnf_call_arptables, 1010 .data = &brnf_call_arptables,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8e3abf564798..e74ddc1c29a8 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -30,6 +30,8 @@ static inline size_t br_port_info_size(void)
30 + nla_total_size(1) /* IFLA_BRPORT_GUARD */ 30 + nla_total_size(1) /* IFLA_BRPORT_GUARD */
31 + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ 31 + nla_total_size(1) /* IFLA_BRPORT_PROTECT */
32 + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ 32 + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */
33 + nla_total_size(1) /* IFLA_BRPORT_LEARNING */
34 + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */
33 + 0; 35 + 0;
34} 36}
35 37
@@ -56,7 +58,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
56 nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || 58 nla_put_u8(skb, IFLA_BRPORT_MODE, mode) ||
57 nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || 59 nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) ||
58 nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || 60 nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) ||
59 nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE))) 61 nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
62 nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) ||
63 nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)))
60 return -EMSGSIZE; 64 return -EMSGSIZE;
61 65
62 return 0; 66 return 0;
@@ -128,7 +132,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
128 else 132 else
129 pv = br_get_vlan_info(br); 133 pv = br_get_vlan_info(br);
130 134
131 if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) 135 if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID))
132 goto done; 136 goto done;
133 137
134 af = nla_nest_start(skb, IFLA_AF_SPEC); 138 af = nla_nest_start(skb, IFLA_AF_SPEC);
@@ -136,7 +140,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
136 goto nla_put_failure; 140 goto nla_put_failure;
137 141
138 pvid = br_get_pvid(pv); 142 pvid = br_get_pvid(pv);
139 for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { 143 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
140 vinfo.vid = vid; 144 vinfo.vid = vid;
141 vinfo.flags = 0; 145 vinfo.flags = 0;
142 if (vid == pvid) 146 if (vid == pvid)
@@ -203,7 +207,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
203 struct net_device *dev, u32 filter_mask) 207 struct net_device *dev, u32 filter_mask)
204{ 208{
205 int err = 0; 209 int err = 0;
206 struct net_bridge_port *port = br_port_get_rcu(dev); 210 struct net_bridge_port *port = br_port_get_rtnl(dev);
207 211
208 /* not a bridge port and */ 212 /* not a bridge port and */
209 if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) 213 if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
@@ -281,6 +285,8 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
281 [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, 285 [IFLA_BRPORT_MODE] = { .type = NLA_U8 },
282 [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, 286 [IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
283 [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, 287 [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
288 [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 },
289 [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
284}; 290};
285 291
286/* Change the state of the port and notify spanning tree */ 292/* Change the state of the port and notify spanning tree */
@@ -328,6 +334,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
328 br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); 334 br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
329 br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); 335 br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE);
330 br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); 336 br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
337 br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
338 br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
331 339
332 if (tb[IFLA_BRPORT_COST]) { 340 if (tb[IFLA_BRPORT_COST]) {
333 err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); 341 err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
@@ -443,7 +451,7 @@ static size_t br_get_link_af_size(const struct net_device *dev)
443 struct net_port_vlans *pv; 451 struct net_port_vlans *pv;
444 452
445 if (br_port_exists(dev)) 453 if (br_port_exists(dev))
446 pv = nbp_get_vlan_info(br_port_get_rcu(dev)); 454 pv = nbp_get_vlan_info(br_port_get_rtnl(dev));
447 else if (dev->priv_flags & IFF_EBRIDGE) 455 else if (dev->priv_flags & IFF_EBRIDGE)
448 pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); 456 pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
449 else 457 else
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 1644b3e1f947..2998dd1769a0 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -31,7 +31,7 @@ struct notifier_block br_device_notifier = {
31 */ 31 */
32static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) 32static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
33{ 33{
34 struct net_device *dev = ptr; 34 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
35 struct net_bridge_port *p; 35 struct net_bridge_port *p;
36 struct net_bridge *br; 36 struct net_bridge *br;
37 bool changed_addr; 37 bool changed_addr;
@@ -102,6 +102,11 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
102 case NETDEV_PRE_TYPE_CHANGE: 102 case NETDEV_PRE_TYPE_CHANGE:
103 /* Forbid underlaying device to change its type. */ 103 /* Forbid underlaying device to change its type. */
104 return NOTIFY_BAD; 104 return NOTIFY_BAD;
105
106 case NETDEV_RESEND_IGMP:
107 /* Propagate to master device */
108 call_netdevice_notifiers(event, br->dev);
109 break;
105 } 110 }
106 111
107 /* Events that may cause spanning tree to refresh */ 112 /* Events that may cause spanning tree to refresh */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d2c043a857b6..efb57d911569 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -66,6 +66,20 @@ struct br_ip
66 __u16 vid; 66 __u16 vid;
67}; 67};
68 68
69#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
70/* our own querier */
71struct bridge_mcast_query {
72 struct timer_list timer;
73 u32 startup_sent;
74};
75
76/* other querier */
77struct bridge_mcast_querier {
78 struct timer_list timer;
79 unsigned long delay_time;
80};
81#endif
82
69struct net_port_vlans { 83struct net_port_vlans {
70 u16 port_idx; 84 u16 port_idx;
71 u16 pvid; 85 u16 pvid;
@@ -112,6 +126,7 @@ struct net_bridge_mdb_entry
112 struct timer_list timer; 126 struct timer_list timer;
113 struct br_ip addr; 127 struct br_ip addr;
114 bool mglist; 128 bool mglist;
129 bool timer_armed;
115}; 130};
116 131
117struct net_bridge_mdb_htable 132struct net_bridge_mdb_htable
@@ -157,12 +172,16 @@ struct net_bridge_port
157#define BR_ROOT_BLOCK 0x00000004 172#define BR_ROOT_BLOCK 0x00000004
158#define BR_MULTICAST_FAST_LEAVE 0x00000008 173#define BR_MULTICAST_FAST_LEAVE 0x00000008
159#define BR_ADMIN_COST 0x00000010 174#define BR_ADMIN_COST 0x00000010
175#define BR_LEARNING 0x00000020
176#define BR_FLOOD 0x00000040
160 177
161#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 178#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
162 u32 multicast_startup_queries_sent; 179 struct bridge_mcast_query ip4_query;
180#if IS_ENABLED(CONFIG_IPV6)
181 struct bridge_mcast_query ip6_query;
182#endif /* IS_ENABLED(CONFIG_IPV6) */
163 unsigned char multicast_router; 183 unsigned char multicast_router;
164 struct timer_list multicast_router_timer; 184 struct timer_list multicast_router_timer;
165 struct timer_list multicast_query_timer;
166 struct hlist_head mglist; 185 struct hlist_head mglist;
167 struct hlist_node rlist; 186 struct hlist_node rlist;
168#endif 187#endif
@@ -183,13 +202,10 @@ struct net_bridge_port
183 202
184static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) 203static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
185{ 204{
186 struct net_bridge_port *port = 205 return rcu_dereference(dev->rx_handler_data);
187 rcu_dereference_rtnl(dev->rx_handler_data);
188
189 return br_port_exists(dev) ? port : NULL;
190} 206}
191 207
192static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev) 208static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev)
193{ 209{
194 return br_port_exists(dev) ? 210 return br_port_exists(dev) ?
195 rtnl_dereference(dev->rx_handler_data) : NULL; 211 rtnl_dereference(dev->rx_handler_data) : NULL;
@@ -249,12 +265,12 @@ struct net_bridge
249 265
250 u8 multicast_disabled:1; 266 u8 multicast_disabled:1;
251 u8 multicast_querier:1; 267 u8 multicast_querier:1;
268 u8 multicast_query_use_ifaddr:1;
252 269
253 u32 hash_elasticity; 270 u32 hash_elasticity;
254 u32 hash_max; 271 u32 hash_max;
255 272
256 u32 multicast_last_member_count; 273 u32 multicast_last_member_count;
257 u32 multicast_startup_queries_sent;
258 u32 multicast_startup_query_count; 274 u32 multicast_startup_query_count;
259 275
260 unsigned long multicast_last_member_interval; 276 unsigned long multicast_last_member_interval;
@@ -269,8 +285,12 @@ struct net_bridge
269 struct hlist_head router_list; 285 struct hlist_head router_list;
270 286
271 struct timer_list multicast_router_timer; 287 struct timer_list multicast_router_timer;
272 struct timer_list multicast_querier_timer; 288 struct bridge_mcast_querier ip4_querier;
273 struct timer_list multicast_query_timer; 289 struct bridge_mcast_query ip4_query;
290#if IS_ENABLED(CONFIG_IPV6)
291 struct bridge_mcast_querier ip6_querier;
292 struct bridge_mcast_query ip6_query;
293#endif /* IS_ENABLED(CONFIG_IPV6) */
274#endif 294#endif
275 295
276 struct timer_list hello_timer; 296 struct timer_list hello_timer;
@@ -329,11 +349,6 @@ extern void br_dev_delete(struct net_device *dev, struct list_head *list);
329extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, 349extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
330 struct net_device *dev); 350 struct net_device *dev);
331#ifdef CONFIG_NET_POLL_CONTROLLER 351#ifdef CONFIG_NET_POLL_CONTROLLER
332static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
333{
334 return br->dev->npinfo;
335}
336
337static inline void br_netpoll_send_skb(const struct net_bridge_port *p, 352static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
338 struct sk_buff *skb) 353 struct sk_buff *skb)
339{ 354{
@@ -346,11 +361,6 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
346extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); 361extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
347extern void br_netpoll_disable(struct net_bridge_port *p); 362extern void br_netpoll_disable(struct net_bridge_port *p);
348#else 363#else
349static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
350{
351 return NULL;
352}
353
354static inline void br_netpoll_send_skb(const struct net_bridge_port *p, 364static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
355 struct sk_buff *skb) 365 struct sk_buff *skb)
356{ 366{
@@ -411,9 +421,10 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb);
411extern void br_forward(const struct net_bridge_port *to, 421extern void br_forward(const struct net_bridge_port *to,
412 struct sk_buff *skb, struct sk_buff *skb0); 422 struct sk_buff *skb, struct sk_buff *skb0);
413extern int br_forward_finish(struct sk_buff *skb); 423extern int br_forward_finish(struct sk_buff *skb);
414extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); 424extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb,
425 bool unicast);
415extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, 426extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
416 struct sk_buff *skb2); 427 struct sk_buff *skb2, bool unicast);
417 428
418/* br_if.c */ 429/* br_if.c */
419extern void br_port_carrier_check(struct net_bridge_port *p); 430extern void br_port_carrier_check(struct net_bridge_port *p);
@@ -470,7 +481,7 @@ extern void br_multicast_free_pg(struct rcu_head *head);
470extern struct net_bridge_port_group *br_multicast_new_port_group( 481extern struct net_bridge_port_group *br_multicast_new_port_group(
471 struct net_bridge_port *port, 482 struct net_bridge_port *port,
472 struct br_ip *group, 483 struct br_ip *group,
473 struct net_bridge_port_group *next, 484 struct net_bridge_port_group __rcu *next,
474 unsigned char state); 485 unsigned char state);
475extern void br_mdb_init(void); 486extern void br_mdb_init(void);
476extern void br_mdb_uninit(void); 487extern void br_mdb_uninit(void);
@@ -480,22 +491,35 @@ extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
480#define mlock_dereference(X, br) \ 491#define mlock_dereference(X, br) \
481 rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) 492 rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
482 493
483#if IS_ENABLED(CONFIG_IPV6)
484#include <net/addrconf.h>
485static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
486{
487 if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
488 return 1;
489 return 0;
490}
491#endif
492
493static inline bool br_multicast_is_router(struct net_bridge *br) 494static inline bool br_multicast_is_router(struct net_bridge *br)
494{ 495{
495 return br->multicast_router == 2 || 496 return br->multicast_router == 2 ||
496 (br->multicast_router == 1 && 497 (br->multicast_router == 1 &&
497 timer_pending(&br->multicast_router_timer)); 498 timer_pending(&br->multicast_router_timer));
498} 499}
500
501static inline bool
502__br_multicast_querier_exists(struct net_bridge *br,
503 struct bridge_mcast_querier *querier)
504{
505 return time_is_before_jiffies(querier->delay_time) &&
506 (br->multicast_querier || timer_pending(&querier->timer));
507}
508
509static inline bool br_multicast_querier_exists(struct net_bridge *br,
510 struct ethhdr *eth)
511{
512 switch (eth->h_proto) {
513 case (htons(ETH_P_IP)):
514 return __br_multicast_querier_exists(br, &br->ip4_querier);
515#if IS_ENABLED(CONFIG_IPV6)
516 case (htons(ETH_P_IPV6)):
517 return __br_multicast_querier_exists(br, &br->ip6_querier);
518#endif
519 default:
520 return false;
521 }
522}
499#else 523#else
500static inline int br_multicast_rcv(struct net_bridge *br, 524static inline int br_multicast_rcv(struct net_bridge *br,
501 struct net_bridge_port *port, 525 struct net_bridge_port *port,
@@ -552,6 +576,11 @@ static inline bool br_multicast_is_router(struct net_bridge *br)
552{ 576{
553 return 0; 577 return 0;
554} 578}
579static inline bool br_multicast_querier_exists(struct net_bridge *br,
580 struct ethhdr *eth)
581{
582 return false;
583}
555static inline void br_mdb_init(void) 584static inline void br_mdb_init(void)
556{ 585{
557} 586}
@@ -714,6 +743,7 @@ extern struct net_bridge_port *br_get_port(struct net_bridge *br,
714extern void br_init_port(struct net_bridge_port *p); 743extern void br_init_port(struct net_bridge_port *p);
715extern void br_become_designated_port(struct net_bridge_port *p); 744extern void br_become_designated_port(struct net_bridge_port *p);
716 745
746extern void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
717extern int br_set_forward_delay(struct net_bridge *br, unsigned long x); 747extern int br_set_forward_delay(struct net_bridge *br, unsigned long x);
718extern int br_set_hello_time(struct net_bridge *br, unsigned long x); 748extern int br_set_hello_time(struct net_bridge *br, unsigned long x);
719extern int br_set_max_age(struct net_bridge *br, unsigned long x); 749extern int br_set_max_age(struct net_bridge *br, unsigned long x);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1c0a50f13229..3c86f0538cbb 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -209,7 +209,7 @@ static void br_record_config_information(struct net_bridge_port *p,
209 p->designated_age = jiffies - bpdu->message_age; 209 p->designated_age = jiffies - bpdu->message_age;
210 210
211 mod_timer(&p->message_age_timer, jiffies 211 mod_timer(&p->message_age_timer, jiffies
212 + (p->br->max_age - bpdu->message_age)); 212 + (bpdu->max_age - bpdu->message_age));
213} 213}
214 214
215/* called under bridge lock */ 215/* called under bridge lock */
@@ -544,18 +544,27 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
544 544
545} 545}
546 546
547void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
548{
549 br->bridge_forward_delay = t;
550 if (br_is_root_bridge(br))
551 br->forward_delay = br->bridge_forward_delay;
552}
553
547int br_set_forward_delay(struct net_bridge *br, unsigned long val) 554int br_set_forward_delay(struct net_bridge *br, unsigned long val)
548{ 555{
549 unsigned long t = clock_t_to_jiffies(val); 556 unsigned long t = clock_t_to_jiffies(val);
557 int err = -ERANGE;
550 558
559 spin_lock_bh(&br->lock);
551 if (br->stp_enabled != BR_NO_STP && 560 if (br->stp_enabled != BR_NO_STP &&
552 (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)) 561 (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY))
553 return -ERANGE; 562 goto unlock;
554 563
555 spin_lock_bh(&br->lock); 564 __br_set_forward_delay(br, t);
556 br->bridge_forward_delay = t; 565 err = 0;
557 if (br_is_root_bridge(br)) 566
558 br->forward_delay = br->bridge_forward_delay; 567unlock:
559 spin_unlock_bh(&br->lock); 568 spin_unlock_bh(&br->lock);
560 return 0; 569 return err;
561} 570}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d45e760141bb..108084a04671 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -129,6 +129,14 @@ static void br_stp_start(struct net_bridge *br)
129 char *envp[] = { NULL }; 129 char *envp[] = { NULL };
130 130
131 r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); 131 r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
132
133 spin_lock_bh(&br->lock);
134
135 if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY)
136 __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY);
137 else if (br->bridge_forward_delay < BR_MAX_FORWARD_DELAY)
138 __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY);
139
132 if (r == 0) { 140 if (r == 0) {
133 br->stp_enabled = BR_USER_STP; 141 br->stp_enabled = BR_USER_STP;
134 br_debug(br, "userspace STP started\n"); 142 br_debug(br, "userspace STP started\n");
@@ -137,10 +145,10 @@ static void br_stp_start(struct net_bridge *br)
137 br_debug(br, "using kernel STP\n"); 145 br_debug(br, "using kernel STP\n");
138 146
139 /* To start timers on any ports left in blocking */ 147 /* To start timers on any ports left in blocking */
140 spin_lock_bh(&br->lock);
141 br_port_state_selection(br); 148 br_port_state_selection(br);
142 spin_unlock_bh(&br->lock);
143 } 149 }
150
151 spin_unlock_bh(&br->lock);
144} 152}
145 153
146static void br_stp_stop(struct net_bridge *br) 154static void br_stp_stop(struct net_bridge *br)
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 8baa9c08e1a4..3b9637fb7939 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Sysfs attributes of bridge ports 2 * Sysfs attributes of bridge
3 * Linux ethernet bridge 3 * Linux ethernet bridge
4 * 4 *
5 * Authors: 5 * Authors:
@@ -375,6 +375,31 @@ static ssize_t store_multicast_snooping(struct device *d,
375static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR, 375static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR,
376 show_multicast_snooping, store_multicast_snooping); 376 show_multicast_snooping, store_multicast_snooping);
377 377
378static ssize_t show_multicast_query_use_ifaddr(struct device *d,
379 struct device_attribute *attr,
380 char *buf)
381{
382 struct net_bridge *br = to_bridge(d);
383 return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr);
384}
385
386static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val)
387{
388 br->multicast_query_use_ifaddr = !!val;
389 return 0;
390}
391
392static ssize_t
393store_multicast_query_use_ifaddr(struct device *d,
394 struct device_attribute *attr,
395 const char *buf, size_t len)
396{
397 return store_bridge_parm(d, buf, len, set_query_use_ifaddr);
398}
399static DEVICE_ATTR(multicast_query_use_ifaddr, S_IRUGO | S_IWUSR,
400 show_multicast_query_use_ifaddr,
401 store_multicast_query_use_ifaddr);
402
378static ssize_t show_multicast_querier(struct device *d, 403static ssize_t show_multicast_querier(struct device *d,
379 struct device_attribute *attr, 404 struct device_attribute *attr,
380 char *buf) 405 char *buf)
@@ -734,6 +759,7 @@ static struct attribute *bridge_attrs[] = {
734 &dev_attr_multicast_router.attr, 759 &dev_attr_multicast_router.attr,
735 &dev_attr_multicast_snooping.attr, 760 &dev_attr_multicast_snooping.attr,
736 &dev_attr_multicast_querier.attr, 761 &dev_attr_multicast_querier.attr,
762 &dev_attr_multicast_query_use_ifaddr.attr,
737 &dev_attr_hash_elasticity.attr, 763 &dev_attr_hash_elasticity.attr,
738 &dev_attr_hash_max.attr, 764 &dev_attr_hash_max.attr,
739 &dev_attr_multicast_last_member_count.attr, 765 &dev_attr_multicast_last_member_count.attr,
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index a1ef1b6e14dc..2a2cdb756d51 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -158,6 +158,8 @@ static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
158BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); 158BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
159BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); 159BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
160BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); 160BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
161BRPORT_ATTR_FLAG(learning, BR_LEARNING);
162BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD);
161 163
162#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 164#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
163static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) 165static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -195,6 +197,8 @@ static const struct brport_attribute *brport_attrs[] = {
195 &brport_attr_hairpin_mode, 197 &brport_attr_hairpin_mode,
196 &brport_attr_bpdu_guard, 198 &brport_attr_bpdu_guard,
197 &brport_attr_root_block, 199 &brport_attr_root_block,
200 &brport_attr_learning,
201 &brport_attr_unicast_flood,
198#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 202#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
199 &brport_attr_multicast_router, 203 &brport_attr_multicast_router,
200 &brport_attr_multicast_fast_leave, 204 &brport_attr_multicast_fast_leave,
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bd58b45f5f90..9a9ffe7e4019 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -108,7 +108,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
108 108
109 clear_bit(vid, v->vlan_bitmap); 109 clear_bit(vid, v->vlan_bitmap);
110 v->num_vlans--; 110 v->num_vlans--;
111 if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { 111 if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) {
112 if (v->port_idx) 112 if (v->port_idx)
113 rcu_assign_pointer(v->parent.port->vlan_info, NULL); 113 rcu_assign_pointer(v->parent.port->vlan_info, NULL);
114 else 114 else
@@ -122,7 +122,7 @@ static void __vlan_flush(struct net_port_vlans *v)
122{ 122{
123 smp_wmb(); 123 smp_wmb();
124 v->pvid = 0; 124 v->pvid = 0;
125 bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN); 125 bitmap_zero(v->vlan_bitmap, VLAN_N_VID);
126 if (v->port_idx) 126 if (v->port_idx)
127 rcu_assign_pointer(v->parent.port->vlan_info, NULL); 127 rcu_assign_pointer(v->parent.port->vlan_info, NULL);
128 else 128 else
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index df0364aa12d5..518093802d1d 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -271,6 +271,12 @@ static int ebt_ulog_tg_check(const struct xt_tgchk_param *par)
271{ 271{
272 struct ebt_ulog_info *uloginfo = par->targinfo; 272 struct ebt_ulog_info *uloginfo = par->targinfo;
273 273
274 if (!par->net->xt.ebt_ulog_warn_deprecated) {
275 pr_info("ebt_ulog is deprecated and it will be removed soon, "
276 "use ebt_nflog instead\n");
277 par->net->xt.ebt_ulog_warn_deprecated = true;
278 }
279
274 if (uloginfo->nlgroup > 31) 280 if (uloginfo->nlgroup > 31)
275 return -EINVAL; 281 return -EINVAL;
276 282
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 70f656ce0f4a..dbd1c783431b 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -64,7 +64,7 @@ static int ebt_broute(struct sk_buff *skb)
64static int __net_init broute_net_init(struct net *net) 64static int __net_init broute_net_init(struct net *net)
65{ 65{
66 net->xt.broute_table = ebt_register_table(net, &broute_table); 66 net->xt.broute_table = ebt_register_table(net, &broute_table);
67 return PTR_RET(net->xt.broute_table); 67 return PTR_ERR_OR_ZERO(net->xt.broute_table);
68} 68}
69 69
70static void __net_exit broute_net_exit(struct net *net) 70static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 3c2e9dced9e0..94b2b700cff8 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -100,7 +100,7 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
100static int __net_init frame_filter_net_init(struct net *net) 100static int __net_init frame_filter_net_init(struct net *net)
101{ 101{
102 net->xt.frame_filter = ebt_register_table(net, &frame_filter); 102 net->xt.frame_filter = ebt_register_table(net, &frame_filter);
103 return PTR_RET(net->xt.frame_filter); 103 return PTR_ERR_OR_ZERO(net->xt.frame_filter);
104} 104}
105 105
106static void __net_exit frame_filter_net_exit(struct net *net) 106static void __net_exit frame_filter_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 10871bc77908..322555acdd40 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -100,7 +100,7 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
100static int __net_init frame_nat_net_init(struct net *net) 100static int __net_init frame_nat_net_init(struct net *net)
101{ 101{
102 net->xt.frame_nat = ebt_register_table(net, &frame_nat); 102 net->xt.frame_nat = ebt_register_table(net, &frame_nat);
103 return PTR_RET(net->xt.frame_nat); 103 return PTR_ERR_OR_ZERO(net->xt.frame_nat);
104} 104}
105 105
106static void __net_exit frame_nat_net_exit(struct net *net) 106static void __net_exit frame_nat_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3d110c4fc787..ac7802428384 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1339,7 +1339,7 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m,
1339 1339
1340 /* ebtables expects 32 bytes long names but xt_match names are 29 bytes 1340 /* ebtables expects 32 bytes long names but xt_match names are 29 bytes
1341 long. Copy 29 bytes and fill remaining bytes with zeroes. */ 1341 long. Copy 29 bytes and fill remaining bytes with zeroes. */
1342 strncpy(name, m->u.match->name, sizeof(name)); 1342 strlcpy(name, m->u.match->name, sizeof(name));
1343 if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) 1343 if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
1344 return -EFAULT; 1344 return -EFAULT;
1345 return 0; 1345 return 0;
@@ -1351,7 +1351,7 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w,
1351 char __user *hlp = ubase + ((char *)w - base); 1351 char __user *hlp = ubase + ((char *)w - base);
1352 char name[EBT_FUNCTION_MAXNAMELEN] = {}; 1352 char name[EBT_FUNCTION_MAXNAMELEN] = {};
1353 1353
1354 strncpy(name, w->u.watcher->name, sizeof(name)); 1354 strlcpy(name, w->u.watcher->name, sizeof(name));
1355 if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN)) 1355 if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN))
1356 return -EFAULT; 1356 return -EFAULT;
1357 return 0; 1357 return 0;
@@ -1377,7 +1377,7 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
1377 ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase); 1377 ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase);
1378 if (ret != 0) 1378 if (ret != 0)
1379 return ret; 1379 return ret;
1380 strncpy(name, t->u.target->name, sizeof(name)); 1380 strlcpy(name, t->u.target->name, sizeof(name));
1381 if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) 1381 if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
1382 return -EFAULT; 1382 return -EFAULT;
1383 return 0; 1383 return 0;
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 1f9ece1a9c34..4dca159435cf 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -352,9 +352,9 @@ EXPORT_SYMBOL(caif_enroll_dev);
352 352
353/* notify Caif of device events */ 353/* notify Caif of device events */
354static int caif_device_notify(struct notifier_block *me, unsigned long what, 354static int caif_device_notify(struct notifier_block *me, unsigned long what,
355 void *arg) 355 void *ptr)
356{ 356{
357 struct net_device *dev = arg; 357 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
358 struct caif_device_entry *caifd = NULL; 358 struct caif_device_entry *caifd = NULL;
359 struct caif_dev_common *caifdev; 359 struct caif_dev_common *caifdev;
360 struct cfcnfg *cfg; 360 struct cfcnfg *cfg;
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 942e00a425fd..75ed04b78fa4 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -121,9 +121,9 @@ static struct packet_type caif_usb_type __read_mostly = {
121}; 121};
122 122
123static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, 123static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
124 void *arg) 124 void *ptr)
125{ 125{
126 struct net_device *dev = arg; 126 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
127 struct caif_dev_common common; 127 struct caif_dev_common common;
128 struct cflayer *layer, *link_support; 128 struct cflayer *layer, *link_support;
129 struct usbnet *usbnet; 129 struct usbnet *usbnet;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 2bd4b58f4372..0f455227da83 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -293,9 +293,10 @@ int cfctrl_linkup_request(struct cflayer *layer,
293 293
294 count = cfctrl_cancel_req(&cfctrl->serv.layer, 294 count = cfctrl_cancel_req(&cfctrl->serv.layer,
295 user_layer); 295 user_layer);
296 if (count != 1) 296 if (count != 1) {
297 pr_err("Could not remove request (%d)", count); 297 pr_err("Could not remove request (%d)", count);
298 return -ENODEV; 298 return -ENODEV;
299 }
299 } 300 }
300 return 0; 301 return 0;
301} 302}
diff --git a/net/can/af_can.c b/net/can/af_can.c
index c4e50852c9f4..3ab8dd2e1282 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -794,9 +794,9 @@ EXPORT_SYMBOL(can_proto_unregister);
794 * af_can notifier to create/remove CAN netdevice specific structs 794 * af_can notifier to create/remove CAN netdevice specific structs
795 */ 795 */
796static int can_notifier(struct notifier_block *nb, unsigned long msg, 796static int can_notifier(struct notifier_block *nb, unsigned long msg,
797 void *data) 797 void *ptr)
798{ 798{
799 struct net_device *dev = (struct net_device *)data; 799 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
800 struct dev_rcv_lists *d; 800 struct dev_rcv_lists *d;
801 801
802 if (!net_eq(dev_net(dev), &init_net)) 802 if (!net_eq(dev_net(dev), &init_net))
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 8f113e6ff327..46f20bfafc0e 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1350,9 +1350,9 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
1350 * notification handler for netdevice status changes 1350 * notification handler for netdevice status changes
1351 */ 1351 */
1352static int bcm_notifier(struct notifier_block *nb, unsigned long msg, 1352static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
1353 void *data) 1353 void *ptr)
1354{ 1354{
1355 struct net_device *dev = (struct net_device *)data; 1355 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1356 struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier); 1356 struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
1357 struct sock *sk = &bo->sk; 1357 struct sock *sk = &bo->sk;
1358 struct bcm_op *op; 1358 struct bcm_op *op;
diff --git a/net/can/gw.c b/net/can/gw.c
index 3ee690e8c7d3..3f9b0f3a2818 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -146,6 +146,7 @@ struct cgw_job {
146 /* tbc */ 146 /* tbc */
147 }; 147 };
148 u8 gwtype; 148 u8 gwtype;
149 u8 limit_hops;
149 u16 flags; 150 u16 flags;
150}; 151};
151 152
@@ -402,6 +403,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
402 403
403 /* put the incremented hop counter in the cloned skb */ 404 /* put the incremented hop counter in the cloned skb */
404 cgw_hops(nskb) = cgw_hops(skb) + 1; 405 cgw_hops(nskb) = cgw_hops(skb) + 1;
406
407 /* first processing of this CAN frame -> adjust to private hop limit */
408 if (gwj->limit_hops && cgw_hops(nskb) == 1)
409 cgw_hops(nskb) = max_hops - gwj->limit_hops + 1;
410
405 nskb->dev = gwj->dst.dev; 411 nskb->dev = gwj->dst.dev;
406 412
407 /* pointer to modifiable CAN frame */ 413 /* pointer to modifiable CAN frame */
@@ -445,9 +451,9 @@ static inline void cgw_unregister_filter(struct cgw_job *gwj)
445} 451}
446 452
447static int cgw_notifier(struct notifier_block *nb, 453static int cgw_notifier(struct notifier_block *nb,
448 unsigned long msg, void *data) 454 unsigned long msg, void *ptr)
449{ 455{
450 struct net_device *dev = (struct net_device *)data; 456 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
451 457
452 if (!net_eq(dev_net(dev), &init_net)) 458 if (!net_eq(dev_net(dev), &init_net))
453 return NOTIFY_DONE; 459 return NOTIFY_DONE;
@@ -509,6 +515,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
509 515
510 /* check non default settings of attributes */ 516 /* check non default settings of attributes */
511 517
518 if (gwj->limit_hops) {
519 if (nla_put_u8(skb, CGW_LIM_HOPS, gwj->limit_hops) < 0)
520 goto cancel;
521 }
522
512 if (gwj->mod.modtype.and) { 523 if (gwj->mod.modtype.and) {
513 memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); 524 memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
514 mb.modtype = gwj->mod.modtype.and; 525 mb.modtype = gwj->mod.modtype.and;
@@ -606,11 +617,12 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = {
606 [CGW_SRC_IF] = { .type = NLA_U32 }, 617 [CGW_SRC_IF] = { .type = NLA_U32 },
607 [CGW_DST_IF] = { .type = NLA_U32 }, 618 [CGW_DST_IF] = { .type = NLA_U32 },
608 [CGW_FILTER] = { .len = sizeof(struct can_filter) }, 619 [CGW_FILTER] = { .len = sizeof(struct can_filter) },
620 [CGW_LIM_HOPS] = { .type = NLA_U8 },
609}; 621};
610 622
611/* check for common and gwtype specific attributes */ 623/* check for common and gwtype specific attributes */
612static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, 624static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
613 u8 gwtype, void *gwtypeattr) 625 u8 gwtype, void *gwtypeattr, u8 *limhops)
614{ 626{
615 struct nlattr *tb[CGW_MAX+1]; 627 struct nlattr *tb[CGW_MAX+1];
616 struct cgw_frame_mod mb; 628 struct cgw_frame_mod mb;
@@ -625,6 +637,13 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
625 if (err < 0) 637 if (err < 0)
626 return err; 638 return err;
627 639
640 if (tb[CGW_LIM_HOPS]) {
641 *limhops = nla_get_u8(tb[CGW_LIM_HOPS]);
642
643 if (*limhops < 1 || *limhops > max_hops)
644 return -EINVAL;
645 }
646
628 /* check for AND/OR/XOR/SET modifications */ 647 /* check for AND/OR/XOR/SET modifications */
629 648
630 if (tb[CGW_MOD_AND]) { 649 if (tb[CGW_MOD_AND]) {
@@ -782,6 +801,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
782{ 801{
783 struct rtcanmsg *r; 802 struct rtcanmsg *r;
784 struct cgw_job *gwj; 803 struct cgw_job *gwj;
804 u8 limhops = 0;
785 int err = 0; 805 int err = 0;
786 806
787 if (!capable(CAP_NET_ADMIN)) 807 if (!capable(CAP_NET_ADMIN))
@@ -808,7 +828,8 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
808 gwj->flags = r->flags; 828 gwj->flags = r->flags;
809 gwj->gwtype = r->gwtype; 829 gwj->gwtype = r->gwtype;
810 830
811 err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw); 831 err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw,
832 &limhops);
812 if (err < 0) 833 if (err < 0)
813 goto out; 834 goto out;
814 835
@@ -836,6 +857,8 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
836 if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops) 857 if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops)
837 goto put_src_dst_out; 858 goto put_src_dst_out;
838 859
860 gwj->limit_hops = limhops;
861
839 ASSERT_RTNL(); 862 ASSERT_RTNL();
840 863
841 err = cgw_register_filter(gwj); 864 err = cgw_register_filter(gwj);
@@ -867,13 +890,14 @@ static void cgw_remove_all_jobs(void)
867 } 890 }
868} 891}
869 892
870static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) 893static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
871{ 894{
872 struct cgw_job *gwj = NULL; 895 struct cgw_job *gwj = NULL;
873 struct hlist_node *nx; 896 struct hlist_node *nx;
874 struct rtcanmsg *r; 897 struct rtcanmsg *r;
875 struct cf_mod mod; 898 struct cf_mod mod;
876 struct can_can_gw ccgw; 899 struct can_can_gw ccgw;
900 u8 limhops = 0;
877 int err = 0; 901 int err = 0;
878 902
879 if (!capable(CAP_NET_ADMIN)) 903 if (!capable(CAP_NET_ADMIN))
@@ -890,7 +914,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
890 if (r->gwtype != CGW_TYPE_CAN_CAN) 914 if (r->gwtype != CGW_TYPE_CAN_CAN)
891 return -EINVAL; 915 return -EINVAL;
892 916
893 err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw); 917 err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
894 if (err < 0) 918 if (err < 0)
895 return err; 919 return err;
896 920
@@ -910,6 +934,9 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
910 if (gwj->flags != r->flags) 934 if (gwj->flags != r->flags)
911 continue; 935 continue;
912 936
937 if (gwj->limit_hops != limhops)
938 continue;
939
913 if (memcmp(&gwj->mod, &mod, sizeof(mod))) 940 if (memcmp(&gwj->mod, &mod, sizeof(mod)))
914 continue; 941 continue;
915 942
diff --git a/net/can/raw.c b/net/can/raw.c
index 1085e65f848e..641e1c895123 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -239,9 +239,9 @@ static int raw_enable_allfilters(struct net_device *dev, struct sock *sk)
239} 239}
240 240
241static int raw_notifier(struct notifier_block *nb, 241static int raw_notifier(struct notifier_block *nb,
242 unsigned long msg, void *data) 242 unsigned long msg, void *ptr)
243{ 243{
244 struct net_device *dev = (struct net_device *)data; 244 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
245 struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); 245 struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
246 struct sock *sk = &ro->sk; 246 struct sock *sk = &ro->sk;
247 247
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 925ca583c09c..8c93fa8d81bc 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -39,6 +39,11 @@ static int should_authenticate(struct ceph_auth_client *ac)
39 return xi->starting; 39 return xi->starting;
40} 40}
41 41
42static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
43{
44 return 0;
45}
46
42/* 47/*
43 * the generic auth code decode the global_id, and we carry no actual 48 * the generic auth code decode the global_id, and we carry no actual
44 * authenticate state, so nothing happens here. 49 * authenticate state, so nothing happens here.
@@ -106,6 +111,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = {
106 .destroy = destroy, 111 .destroy = destroy,
107 .is_authenticated = is_authenticated, 112 .is_authenticated = is_authenticated,
108 .should_authenticate = should_authenticate, 113 .should_authenticate = should_authenticate,
114 .build_request = build_request,
109 .handle_reply = handle_reply, 115 .handle_reply = handle_reply,
110 .create_authorizer = ceph_auth_none_create_authorizer, 116 .create_authorizer = ceph_auth_none_create_authorizer,
111 .destroy_authorizer = ceph_auth_none_destroy_authorizer, 117 .destroy_authorizer = ceph_auth_none_destroy_authorizer,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index eb0a46a49bd4..4a5df7b1cc9f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -290,7 +290,7 @@ int ceph_msgr_init(void)
290 if (ceph_msgr_slab_init()) 290 if (ceph_msgr_slab_init())
291 return -ENOMEM; 291 return -ENOMEM;
292 292
293 ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); 293 ceph_msgr_wq = alloc_workqueue("ceph-msgr", 0, 0);
294 if (ceph_msgr_wq) 294 if (ceph_msgr_wq)
295 return 0; 295 return 0;
296 296
@@ -409,7 +409,7 @@ static void ceph_sock_write_space(struct sock *sk)
409 * and net/core/stream.c:sk_stream_write_space(). 409 * and net/core/stream.c:sk_stream_write_space().
410 */ 410 */
411 if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) { 411 if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) {
412 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 412 if (sk_stream_is_writeable(sk)) {
413 dout("%s %p queueing write work\n", __func__, con); 413 dout("%s %p queueing write work\n", __func__, con);
414 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 414 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
415 queue_con(con); 415 queue_con(con);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d5953b87918c..2b4b32aaa893 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -503,7 +503,9 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
503 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); 503 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
504 size_t payload_len = 0; 504 size_t payload_len = 0;
505 505
506 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); 506 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
507 opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
508 opcode != CEPH_OSD_OP_TRUNCATE);
507 509
508 op->extent.offset = offset; 510 op->extent.offset = offset;
509 op->extent.length = length; 511 op->extent.length = length;
@@ -631,6 +633,9 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
631 break; 633 break;
632 case CEPH_OSD_OP_READ: 634 case CEPH_OSD_OP_READ:
633 case CEPH_OSD_OP_WRITE: 635 case CEPH_OSD_OP_WRITE:
636 case CEPH_OSD_OP_ZERO:
637 case CEPH_OSD_OP_DELETE:
638 case CEPH_OSD_OP_TRUNCATE:
634 if (src->op == CEPH_OSD_OP_WRITE) 639 if (src->op == CEPH_OSD_OP_WRITE)
635 request_data_len = src->extent.length; 640 request_data_len = src->extent.length;
636 dst->extent.offset = cpu_to_le64(src->extent.offset); 641 dst->extent.offset = cpu_to_le64(src->extent.offset);
@@ -715,7 +720,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
715 u64 object_base; 720 u64 object_base;
716 int r; 721 int r;
717 722
718 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); 723 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
724 opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
725 opcode != CEPH_OSD_OP_TRUNCATE);
719 726
720 req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, 727 req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
721 GFP_NOFS); 728 GFP_NOFS);
@@ -733,12 +740,14 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
733 740
734 object_size = le32_to_cpu(layout->fl_object_size); 741 object_size = le32_to_cpu(layout->fl_object_size);
735 object_base = off - objoff; 742 object_base = off - objoff;
736 if (truncate_size <= object_base) { 743 if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
737 truncate_size = 0; 744 if (truncate_size <= object_base) {
738 } else { 745 truncate_size = 0;
739 truncate_size -= object_base; 746 } else {
740 if (truncate_size > object_size) 747 truncate_size -= object_base;
741 truncate_size = object_size; 748 if (truncate_size > object_size)
749 truncate_size = object_size;
750 }
742 } 751 }
743 752
744 osd_req_op_extent_init(req, 0, opcode, objoff, objlen, 753 osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
@@ -1174,6 +1183,7 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
1174 struct ceph_osd_request *req) 1183 struct ceph_osd_request *req)
1175{ 1184{
1176 dout("__register_linger_request %p\n", req); 1185 dout("__register_linger_request %p\n", req);
1186 ceph_osdc_get_request(req);
1177 list_add_tail(&req->r_linger_item, &osdc->req_linger); 1187 list_add_tail(&req->r_linger_item, &osdc->req_linger);
1178 if (req->r_osd) 1188 if (req->r_osd)
1179 list_add_tail(&req->r_linger_osd, 1189 list_add_tail(&req->r_linger_osd,
@@ -1196,6 +1206,7 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
1196 if (list_empty(&req->r_osd_item)) 1206 if (list_empty(&req->r_osd_item))
1197 req->r_osd = NULL; 1207 req->r_osd = NULL;
1198 } 1208 }
1209 ceph_osdc_put_request(req);
1199} 1210}
1200 1211
1201void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, 1212void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
@@ -1203,9 +1214,8 @@ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
1203{ 1214{
1204 mutex_lock(&osdc->request_mutex); 1215 mutex_lock(&osdc->request_mutex);
1205 if (req->r_linger) { 1216 if (req->r_linger) {
1206 __unregister_linger_request(osdc, req);
1207 req->r_linger = 0; 1217 req->r_linger = 0;
1208 ceph_osdc_put_request(req); 1218 __unregister_linger_request(osdc, req);
1209 } 1219 }
1210 mutex_unlock(&osdc->request_mutex); 1220 mutex_unlock(&osdc->request_mutex);
1211} 1221}
@@ -1217,11 +1227,6 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
1217 if (!req->r_linger) { 1227 if (!req->r_linger) {
1218 dout("set_request_linger %p\n", req); 1228 dout("set_request_linger %p\n", req);
1219 req->r_linger = 1; 1229 req->r_linger = 1;
1220 /*
1221 * caller is now responsible for calling
1222 * unregister_linger_request
1223 */
1224 ceph_osdc_get_request(req);
1225 } 1230 }
1226} 1231}
1227EXPORT_SYMBOL(ceph_osdc_set_request_linger); 1232EXPORT_SYMBOL(ceph_osdc_set_request_linger);
@@ -1339,10 +1344,6 @@ static void __send_request(struct ceph_osd_client *osdc,
1339 1344
1340 ceph_msg_get(req->r_request); /* send consumes a ref */ 1345 ceph_msg_get(req->r_request); /* send consumes a ref */
1341 1346
1342 /* Mark the request unsafe if this is the first timet's being sent. */
1343
1344 if (!req->r_sent && req->r_unsafe_callback)
1345 req->r_unsafe_callback(req, true);
1346 req->r_sent = req->r_osd->o_incarnation; 1347 req->r_sent = req->r_osd->o_incarnation;
1347 1348
1348 ceph_con_send(&req->r_osd->o_con, req->r_request); 1349 ceph_con_send(&req->r_osd->o_con, req->r_request);
@@ -1433,8 +1434,6 @@ static void handle_osds_timeout(struct work_struct *work)
1433 1434
1434static void complete_request(struct ceph_osd_request *req) 1435static void complete_request(struct ceph_osd_request *req)
1435{ 1436{
1436 if (req->r_unsafe_callback)
1437 req->r_unsafe_callback(req, false);
1438 complete_all(&req->r_safe_completion); /* fsync waiter */ 1437 complete_all(&req->r_safe_completion); /* fsync waiter */
1439} 1438}
1440 1439
@@ -1496,14 +1495,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1496 dout("handle_reply %p tid %llu req %p result %d\n", msg, tid, 1495 dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
1497 req, result); 1496 req, result);
1498 1497
1499 ceph_decode_need(&p, end, 4, bad); 1498 ceph_decode_need(&p, end, 4, bad_put);
1500 numops = ceph_decode_32(&p); 1499 numops = ceph_decode_32(&p);
1501 if (numops > CEPH_OSD_MAX_OP) 1500 if (numops > CEPH_OSD_MAX_OP)
1502 goto bad_put; 1501 goto bad_put;
1503 if (numops != req->r_num_ops) 1502 if (numops != req->r_num_ops)
1504 goto bad_put; 1503 goto bad_put;
1505 payload_len = 0; 1504 payload_len = 0;
1506 ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad); 1505 ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad_put);
1507 for (i = 0; i < numops; i++) { 1506 for (i = 0; i < numops; i++) {
1508 struct ceph_osd_op *op = p; 1507 struct ceph_osd_op *op = p;
1509 int len; 1508 int len;
@@ -1521,11 +1520,13 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1521 goto bad_put; 1520 goto bad_put;
1522 } 1521 }
1523 1522
1524 ceph_decode_need(&p, end, 4 + numops * 4, bad); 1523 ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
1525 retry_attempt = ceph_decode_32(&p); 1524 retry_attempt = ceph_decode_32(&p);
1526 for (i = 0; i < numops; i++) 1525 for (i = 0; i < numops; i++)
1527 req->r_reply_op_result[i] = ceph_decode_32(&p); 1526 req->r_reply_op_result[i] = ceph_decode_32(&p);
1528 1527
1528 already_completed = req->r_got_reply;
1529
1529 if (!req->r_got_reply) { 1530 if (!req->r_got_reply) {
1530 1531
1531 req->r_result = result; 1532 req->r_result = result;
@@ -1556,19 +1557,23 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1556 ((flags & CEPH_OSD_FLAG_WRITE) == 0)) 1557 ((flags & CEPH_OSD_FLAG_WRITE) == 0))
1557 __unregister_request(osdc, req); 1558 __unregister_request(osdc, req);
1558 1559
1559 already_completed = req->r_completed;
1560 req->r_completed = 1;
1561 mutex_unlock(&osdc->request_mutex); 1560 mutex_unlock(&osdc->request_mutex);
1562 if (already_completed)
1563 goto done;
1564 1561
1565 if (req->r_callback) 1562 if (!already_completed) {
1566 req->r_callback(req, msg); 1563 if (req->r_unsafe_callback &&
1567 else 1564 result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK))
1568 complete_all(&req->r_completion); 1565 req->r_unsafe_callback(req, true);
1566 if (req->r_callback)
1567 req->r_callback(req, msg);
1568 else
1569 complete_all(&req->r_completion);
1570 }
1569 1571
1570 if (flags & CEPH_OSD_FLAG_ONDISK) 1572 if (flags & CEPH_OSD_FLAG_ONDISK) {
1573 if (req->r_unsafe_callback && already_completed)
1574 req->r_unsafe_callback(req, false);
1571 complete_request(req); 1575 complete_request(req);
1576 }
1572 1577
1573done: 1578done:
1574 dout("req=%p req->r_linger=%d\n", req, req->r_linger); 1579 dout("req=%p req->r_linger=%d\n", req, req->r_linger);
@@ -1633,8 +1638,10 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
1633 dout("%p tid %llu restart on osd%d\n", 1638 dout("%p tid %llu restart on osd%d\n",
1634 req, req->r_tid, 1639 req, req->r_tid,
1635 req->r_osd ? req->r_osd->o_osd : -1); 1640 req->r_osd ? req->r_osd->o_osd : -1);
1641 ceph_osdc_get_request(req);
1636 __unregister_request(osdc, req); 1642 __unregister_request(osdc, req);
1637 __register_linger_request(osdc, req); 1643 __register_linger_request(osdc, req);
1644 ceph_osdc_put_request(req);
1638 continue; 1645 continue;
1639 } 1646 }
1640 1647
@@ -1675,13 +1682,13 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
1675 __register_request(osdc, req); 1682 __register_request(osdc, req);
1676 __unregister_linger_request(osdc, req); 1683 __unregister_linger_request(osdc, req);
1677 } 1684 }
1685 reset_changed_osds(osdc);
1678 mutex_unlock(&osdc->request_mutex); 1686 mutex_unlock(&osdc->request_mutex);
1679 1687
1680 if (needmap) { 1688 if (needmap) {
1681 dout("%d requests for down osds, need new map\n", needmap); 1689 dout("%d requests for down osds, need new map\n", needmap);
1682 ceph_monc_request_next_osdmap(&osdc->client->monc); 1690 ceph_monc_request_next_osdmap(&osdc->client->monc);
1683 } 1691 }
1684 reset_changed_osds(osdc);
1685} 1692}
1686 1693
1687 1694
@@ -1786,6 +1793,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1786 nr_maps--; 1793 nr_maps--;
1787 } 1794 }
1788 1795
1796 if (!osdc->osdmap)
1797 goto bad;
1789done: 1798done:
1790 downgrade_write(&osdc->map_sem); 1799 downgrade_write(&osdc->map_sem);
1791 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); 1800 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
@@ -2123,13 +2132,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
2123 __register_request(osdc, req); 2132 __register_request(osdc, req);
2124 req->r_sent = 0; 2133 req->r_sent = 0;
2125 req->r_got_reply = 0; 2134 req->r_got_reply = 0;
2126 req->r_completed = 0;
2127 rc = __map_request(osdc, req, 0); 2135 rc = __map_request(osdc, req, 0);
2128 if (rc < 0) { 2136 if (rc < 0) {
2129 if (nofail) { 2137 if (nofail) {
2130 dout("osdc_start_request failed map, " 2138 dout("osdc_start_request failed map, "
2131 " will retry %lld\n", req->r_tid); 2139 " will retry %lld\n", req->r_tid);
2132 rc = 0; 2140 rc = 0;
2141 } else {
2142 __unregister_request(osdc, req);
2133 } 2143 }
2134 goto out_unlock; 2144 goto out_unlock;
2135 } 2145 }
@@ -2206,6 +2216,17 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc)
2206EXPORT_SYMBOL(ceph_osdc_sync); 2216EXPORT_SYMBOL(ceph_osdc_sync);
2207 2217
2208/* 2218/*
2219 * Call all pending notify callbacks - for use after a watch is
2220 * unregistered, to make sure no more callbacks for it will be invoked
2221 */
2222extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
2223{
2224 flush_workqueue(osdc->notify_wq);
2225}
2226EXPORT_SYMBOL(ceph_osdc_flush_notifies);
2227
2228
2229/*
2209 * init, shutdown 2230 * init, shutdown
2210 */ 2231 */
2211int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) 2232int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
@@ -2254,12 +2275,10 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
2254 if (err < 0) 2275 if (err < 0)
2255 goto out_msgpool; 2276 goto out_msgpool;
2256 2277
2278 err = -ENOMEM;
2257 osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify"); 2279 osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
2258 if (IS_ERR(osdc->notify_wq)) { 2280 if (!osdc->notify_wq)
2259 err = PTR_ERR(osdc->notify_wq);
2260 osdc->notify_wq = NULL;
2261 goto out_msgpool; 2281 goto out_msgpool;
2262 }
2263 return 0; 2282 return 0;
2264 2283
2265out_msgpool: 2284out_msgpool:
@@ -2456,8 +2475,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2456 ceph_msg_revoke_incoming(req->r_reply); 2475 ceph_msg_revoke_incoming(req->r_reply);
2457 2476
2458 if (front > req->r_reply->front.iov_len) { 2477 if (front > req->r_reply->front.iov_len) {
2459 pr_warning("get_reply front %d > preallocated %d\n", 2478 pr_warning("get_reply front %d > preallocated %d (%u#%llu)\n",
2460 front, (int)req->r_reply->front.iov_len); 2479 front, (int)req->r_reply->front.iov_len,
2480 (unsigned int)con->peer_name.type,
2481 le64_to_cpu(con->peer_name.num));
2461 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS, false); 2482 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS, false);
2462 if (!m) 2483 if (!m)
2463 goto out; 2484 goto out;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 603ddd92db19..dbd9a4792427 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1129,7 +1129,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1129 1129
1130 /* pg_temp? */ 1130 /* pg_temp? */
1131 pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, 1131 pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
1132 pool->pgp_num_mask); 1132 pool->pg_num_mask);
1133 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); 1133 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
1134 if (pg) { 1134 if (pg) {
1135 *num = pg->len; 1135 *num = pg->len;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b71423db7785..af814e764206 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -48,6 +48,7 @@
48#include <linux/highmem.h> 48#include <linux/highmem.h>
49#include <linux/spinlock.h> 49#include <linux/spinlock.h>
50#include <linux/slab.h> 50#include <linux/slab.h>
51#include <linux/pagemap.h>
51 52
52#include <net/protocol.h> 53#include <net/protocol.h>
53#include <linux/skbuff.h> 54#include <linux/skbuff.h>
@@ -56,6 +57,7 @@
56#include <net/sock.h> 57#include <net/sock.h>
57#include <net/tcp_states.h> 58#include <net/tcp_states.h>
58#include <trace/events/skb.h> 59#include <trace/events/skb.h>
60#include <net/busy_poll.h>
59 61
60/* 62/*
61 * Is a socket 'connection oriented' ? 63 * Is a socket 'connection oriented' ?
@@ -207,6 +209,10 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
207 } 209 }
208 spin_unlock_irqrestore(&queue->lock, cpu_flags); 210 spin_unlock_irqrestore(&queue->lock, cpu_flags);
209 211
212 if (sk_can_busy_loop(sk) &&
213 sk_busy_loop(sk, flags & MSG_DONTWAIT))
214 continue;
215
210 /* User doesn't want to wait */ 216 /* User doesn't want to wait */
211 error = -EAGAIN; 217 error = -EAGAIN;
212 if (!timeo) 218 if (!timeo)
@@ -568,6 +574,77 @@ fault:
568} 574}
569EXPORT_SYMBOL(skb_copy_datagram_from_iovec); 575EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
570 576
577/**
578 * zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec
579 * @skb: buffer to copy
580 * @from: io vector to copy to
581 * @offset: offset in the io vector to start copying from
582 * @count: amount of vectors to copy to buffer from
583 *
584 * The function will first copy up to headlen, and then pin the userspace
585 * pages and build frags through them.
586 *
587 * Returns 0, -EFAULT or -EMSGSIZE.
588 * Note: the iovec is not modified during the copy
589 */
590int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
591 int offset, size_t count)
592{
593 int len = iov_length(from, count) - offset;
594 int copy = min_t(int, skb_headlen(skb), len);
595 int size;
596 int i = 0;
597
598 /* copy up to skb headlen */
599 if (skb_copy_datagram_from_iovec(skb, 0, from, offset, copy))
600 return -EFAULT;
601
602 if (len == copy)
603 return 0;
604
605 offset += copy;
606 while (count--) {
607 struct page *page[MAX_SKB_FRAGS];
608 int num_pages;
609 unsigned long base;
610 unsigned long truesize;
611
612 /* Skip over from offset and copied */
613 if (offset >= from->iov_len) {
614 offset -= from->iov_len;
615 ++from;
616 continue;
617 }
618 len = from->iov_len - offset;
619 base = (unsigned long)from->iov_base + offset;
620 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
621 if (i + size > MAX_SKB_FRAGS)
622 return -EMSGSIZE;
623 num_pages = get_user_pages_fast(base, size, 0, &page[i]);
624 if (num_pages != size) {
625 release_pages(&page[i], num_pages, 0);
626 return -EFAULT;
627 }
628 truesize = size * PAGE_SIZE;
629 skb->data_len += len;
630 skb->len += len;
631 skb->truesize += truesize;
632 atomic_add(truesize, &skb->sk->sk_wmem_alloc);
633 while (len) {
634 int off = base & ~PAGE_MASK;
635 int size = min_t(int, len, PAGE_SIZE - off);
636 skb_fill_page_desc(skb, i, page[i], off, size);
637 base += size;
638 len -= size;
639 i++;
640 }
641 offset = 0;
642 ++from;
643 }
644 return 0;
645}
646EXPORT_SYMBOL(zerocopy_sg_from_iovec);
647
571static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, 648static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
572 u8 __user *to, int len, 649 u8 __user *to, int len,
573 __wsum *csump) 650 __wsum *csump)
diff --git a/net/core/dev.c b/net/core/dev.c
index fc1e289397f5..65f829cfd928 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,8 @@
129#include <linux/inetdevice.h> 129#include <linux/inetdevice.h>
130#include <linux/cpu_rmap.h> 130#include <linux/cpu_rmap.h>
131#include <linux/static_key.h> 131#include <linux/static_key.h>
132#include <linux/hashtable.h>
133#include <linux/vmalloc.h>
132 134
133#include "net-sysfs.h" 135#include "net-sysfs.h"
134 136
@@ -166,7 +168,13 @@ static struct list_head offload_base __read_mostly;
166DEFINE_RWLOCK(dev_base_lock); 168DEFINE_RWLOCK(dev_base_lock);
167EXPORT_SYMBOL(dev_base_lock); 169EXPORT_SYMBOL(dev_base_lock);
168 170
169seqcount_t devnet_rename_seq; 171/* protects napi_hash addition/deletion and napi_gen_id */
172static DEFINE_SPINLOCK(napi_hash_lock);
173
174static unsigned int napi_gen_id;
175static DEFINE_HASHTABLE(napi_hash, 8);
176
177static seqcount_t devnet_rename_seq;
170 178
171static inline void dev_base_seq_inc(struct net *net) 179static inline void dev_base_seq_inc(struct net *net)
172{ 180{
@@ -792,6 +800,40 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
792EXPORT_SYMBOL(dev_get_by_index); 800EXPORT_SYMBOL(dev_get_by_index);
793 801
794/** 802/**
803 * netdev_get_name - get a netdevice name, knowing its ifindex.
804 * @net: network namespace
805 * @name: a pointer to the buffer where the name will be stored.
806 * @ifindex: the ifindex of the interface to get the name from.
807 *
808 * The use of raw_seqcount_begin() and cond_resched() before
809 * retrying is required as we want to give the writers a chance
810 * to complete when CONFIG_PREEMPT is not set.
811 */
812int netdev_get_name(struct net *net, char *name, int ifindex)
813{
814 struct net_device *dev;
815 unsigned int seq;
816
817retry:
818 seq = raw_seqcount_begin(&devnet_rename_seq);
819 rcu_read_lock();
820 dev = dev_get_by_index_rcu(net, ifindex);
821 if (!dev) {
822 rcu_read_unlock();
823 return -ENODEV;
824 }
825
826 strcpy(name, dev->name);
827 rcu_read_unlock();
828 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
829 cond_resched();
830 goto retry;
831 }
832
833 return 0;
834}
835
836/**
795 * dev_getbyhwaddr_rcu - find a device by its hardware address 837 * dev_getbyhwaddr_rcu - find a device by its hardware address
796 * @net: the applicable net namespace 838 * @net: the applicable net namespace
797 * @type: media type of device 839 * @type: media type of device
@@ -1198,9 +1240,7 @@ static int __dev_open(struct net_device *dev)
1198 * If we don't do this there is a chance ndo_poll_controller 1240 * If we don't do this there is a chance ndo_poll_controller
1199 * or ndo_poll may be running while we open the device 1241 * or ndo_poll may be running while we open the device
1200 */ 1242 */
1201 ret = netpoll_rx_disable(dev); 1243 netpoll_rx_disable(dev);
1202 if (ret)
1203 return ret;
1204 1244
1205 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); 1245 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1206 ret = notifier_to_errno(ret); 1246 ret = notifier_to_errno(ret);
@@ -1309,9 +1349,7 @@ static int __dev_close(struct net_device *dev)
1309 LIST_HEAD(single); 1349 LIST_HEAD(single);
1310 1350
1311 /* Temporarily disable netpoll until the interface is down */ 1351 /* Temporarily disable netpoll until the interface is down */
1312 retval = netpoll_rx_disable(dev); 1352 netpoll_rx_disable(dev);
1313 if (retval)
1314 return retval;
1315 1353
1316 list_add(&dev->unreg_list, &single); 1354 list_add(&dev->unreg_list, &single);
1317 retval = __dev_close_many(&single); 1355 retval = __dev_close_many(&single);
@@ -1353,14 +1391,11 @@ static int dev_close_many(struct list_head *head)
1353 */ 1391 */
1354int dev_close(struct net_device *dev) 1392int dev_close(struct net_device *dev)
1355{ 1393{
1356 int ret = 0;
1357 if (dev->flags & IFF_UP) { 1394 if (dev->flags & IFF_UP) {
1358 LIST_HEAD(single); 1395 LIST_HEAD(single);
1359 1396
1360 /* Block netpoll rx while the interface is going down */ 1397 /* Block netpoll rx while the interface is going down */
1361 ret = netpoll_rx_disable(dev); 1398 netpoll_rx_disable(dev);
1362 if (ret)
1363 return ret;
1364 1399
1365 list_add(&dev->unreg_list, &single); 1400 list_add(&dev->unreg_list, &single);
1366 dev_close_many(&single); 1401 dev_close_many(&single);
@@ -1368,7 +1403,7 @@ int dev_close(struct net_device *dev)
1368 1403
1369 netpoll_rx_enable(dev); 1404 netpoll_rx_enable(dev);
1370 } 1405 }
1371 return ret; 1406 return 0;
1372} 1407}
1373EXPORT_SYMBOL(dev_close); 1408EXPORT_SYMBOL(dev_close);
1374 1409
@@ -1398,6 +1433,14 @@ void dev_disable_lro(struct net_device *dev)
1398} 1433}
1399EXPORT_SYMBOL(dev_disable_lro); 1434EXPORT_SYMBOL(dev_disable_lro);
1400 1435
1436static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1437 struct net_device *dev)
1438{
1439 struct netdev_notifier_info info;
1440
1441 netdev_notifier_info_init(&info, dev);
1442 return nb->notifier_call(nb, val, &info);
1443}
1401 1444
1402static int dev_boot_phase = 1; 1445static int dev_boot_phase = 1;
1403 1446
@@ -1430,7 +1473,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
1430 goto unlock; 1473 goto unlock;
1431 for_each_net(net) { 1474 for_each_net(net) {
1432 for_each_netdev(net, dev) { 1475 for_each_netdev(net, dev) {
1433 err = nb->notifier_call(nb, NETDEV_REGISTER, dev); 1476 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1434 err = notifier_to_errno(err); 1477 err = notifier_to_errno(err);
1435 if (err) 1478 if (err)
1436 goto rollback; 1479 goto rollback;
@@ -1438,7 +1481,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
1438 if (!(dev->flags & IFF_UP)) 1481 if (!(dev->flags & IFF_UP))
1439 continue; 1482 continue;
1440 1483
1441 nb->notifier_call(nb, NETDEV_UP, dev); 1484 call_netdevice_notifier(nb, NETDEV_UP, dev);
1442 } 1485 }
1443 } 1486 }
1444 1487
@@ -1454,10 +1497,11 @@ rollback:
1454 goto outroll; 1497 goto outroll;
1455 1498
1456 if (dev->flags & IFF_UP) { 1499 if (dev->flags & IFF_UP) {
1457 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); 1500 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1458 nb->notifier_call(nb, NETDEV_DOWN, dev); 1501 dev);
1502 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1459 } 1503 }
1460 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1504 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1461 } 1505 }
1462 } 1506 }
1463 1507
@@ -1495,10 +1539,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
1495 for_each_net(net) { 1539 for_each_net(net) {
1496 for_each_netdev(net, dev) { 1540 for_each_netdev(net, dev) {
1497 if (dev->flags & IFF_UP) { 1541 if (dev->flags & IFF_UP) {
1498 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); 1542 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1499 nb->notifier_call(nb, NETDEV_DOWN, dev); 1543 dev);
1544 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1500 } 1545 }
1501 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1546 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1502 } 1547 }
1503 } 1548 }
1504unlock: 1549unlock:
@@ -1508,6 +1553,25 @@ unlock:
1508EXPORT_SYMBOL(unregister_netdevice_notifier); 1553EXPORT_SYMBOL(unregister_netdevice_notifier);
1509 1554
1510/** 1555/**
1556 * call_netdevice_notifiers_info - call all network notifier blocks
1557 * @val: value passed unmodified to notifier function
1558 * @dev: net_device pointer passed unmodified to notifier function
1559 * @info: notifier information data
1560 *
1561 * Call all network notifier blocks. Parameters and return value
1562 * are as for raw_notifier_call_chain().
1563 */
1564
1565int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
1566 struct netdev_notifier_info *info)
1567{
1568 ASSERT_RTNL();
1569 netdev_notifier_info_init(info, dev);
1570 return raw_notifier_call_chain(&netdev_chain, val, info);
1571}
1572EXPORT_SYMBOL(call_netdevice_notifiers_info);
1573
1574/**
1511 * call_netdevice_notifiers - call all network notifier blocks 1575 * call_netdevice_notifiers - call all network notifier blocks
1512 * @val: value passed unmodified to notifier function 1576 * @val: value passed unmodified to notifier function
1513 * @dev: net_device pointer passed unmodified to notifier function 1577 * @dev: net_device pointer passed unmodified to notifier function
@@ -1518,8 +1582,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1518 1582
1519int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1583int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1520{ 1584{
1521 ASSERT_RTNL(); 1585 struct netdev_notifier_info info;
1522 return raw_notifier_call_chain(&netdev_chain, val, dev); 1586
1587 return call_netdevice_notifiers_info(val, dev, &info);
1523} 1588}
1524EXPORT_SYMBOL(call_netdevice_notifiers); 1589EXPORT_SYMBOL(call_netdevice_notifiers);
1525 1590
@@ -1621,23 +1686,19 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1621 } 1686 }
1622 } 1687 }
1623 1688
1624 skb_orphan(skb);
1625
1626 if (unlikely(!is_skb_forwardable(dev, skb))) { 1689 if (unlikely(!is_skb_forwardable(dev, skb))) {
1627 atomic_long_inc(&dev->rx_dropped); 1690 atomic_long_inc(&dev->rx_dropped);
1628 kfree_skb(skb); 1691 kfree_skb(skb);
1629 return NET_RX_DROP; 1692 return NET_RX_DROP;
1630 } 1693 }
1631 skb->skb_iif = 0;
1632 skb->dev = dev;
1633 skb_dst_drop(skb);
1634 skb->tstamp.tv64 = 0;
1635 skb->pkt_type = PACKET_HOST;
1636 skb->protocol = eth_type_trans(skb, dev); 1694 skb->protocol = eth_type_trans(skb, dev);
1637 skb->mark = 0; 1695
1638 secpath_reset(skb); 1696 /* eth_type_trans() can set pkt_type.
1639 nf_reset(skb); 1697 * call skb_scrub_packet() after it to clear pkt_type _after_ calling
1640 nf_reset_trace(skb); 1698 * eth_type_trans().
1699 */
1700 skb_scrub_packet(skb, true);
1701
1641 return netif_rx(skb); 1702 return netif_rx(skb);
1642} 1703}
1643EXPORT_SYMBOL_GPL(dev_forward_skb); 1704EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1702,7 +1763,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1702 skb_reset_mac_header(skb2); 1763 skb_reset_mac_header(skb2);
1703 1764
1704 if (skb_network_header(skb2) < skb2->data || 1765 if (skb_network_header(skb2) < skb2->data ||
1705 skb2->network_header > skb2->tail) { 1766 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1706 net_crit_ratelimited("protocol %04x is buggy, dev %s\n", 1767 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1707 ntohs(skb2->protocol), 1768 ntohs(skb2->protocol),
1708 dev->name); 1769 dev->name);
@@ -2420,10 +2481,10 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2420} 2481}
2421 2482
2422static netdev_features_t harmonize_features(struct sk_buff *skb, 2483static netdev_features_t harmonize_features(struct sk_buff *skb,
2423 __be16 protocol, netdev_features_t features) 2484 netdev_features_t features)
2424{ 2485{
2425 if (skb->ip_summed != CHECKSUM_NONE && 2486 if (skb->ip_summed != CHECKSUM_NONE &&
2426 !can_checksum_protocol(features, protocol)) { 2487 !can_checksum_protocol(features, skb_network_protocol(skb))) {
2427 features &= ~NETIF_F_ALL_CSUM; 2488 features &= ~NETIF_F_ALL_CSUM;
2428 } else if (illegal_highdma(skb->dev, skb)) { 2489 } else if (illegal_highdma(skb->dev, skb)) {
2429 features &= ~NETIF_F_SG; 2490 features &= ~NETIF_F_SG;
@@ -2444,20 +2505,18 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
2444 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2505 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2445 protocol = veh->h_vlan_encapsulated_proto; 2506 protocol = veh->h_vlan_encapsulated_proto;
2446 } else if (!vlan_tx_tag_present(skb)) { 2507 } else if (!vlan_tx_tag_present(skb)) {
2447 return harmonize_features(skb, protocol, features); 2508 return harmonize_features(skb, features);
2448 } 2509 }
2449 2510
2450 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | 2511 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
2451 NETIF_F_HW_VLAN_STAG_TX); 2512 NETIF_F_HW_VLAN_STAG_TX);
2452 2513
2453 if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) { 2514 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
2454 return harmonize_features(skb, protocol, features);
2455 } else {
2456 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | 2515 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2457 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | 2516 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2458 NETIF_F_HW_VLAN_STAG_TX; 2517 NETIF_F_HW_VLAN_STAG_TX;
2459 return harmonize_features(skb, protocol, features); 2518
2460 } 2519 return harmonize_features(skb, features);
2461} 2520}
2462EXPORT_SYMBOL(netif_skb_features); 2521EXPORT_SYMBOL(netif_skb_features);
2463 2522
@@ -3065,6 +3124,46 @@ static int rps_ipi_queued(struct softnet_data *sd)
3065 return 0; 3124 return 0;
3066} 3125}
3067 3126
3127#ifdef CONFIG_NET_FLOW_LIMIT
3128int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3129#endif
3130
3131static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3132{
3133#ifdef CONFIG_NET_FLOW_LIMIT
3134 struct sd_flow_limit *fl;
3135 struct softnet_data *sd;
3136 unsigned int old_flow, new_flow;
3137
3138 if (qlen < (netdev_max_backlog >> 1))
3139 return false;
3140
3141 sd = &__get_cpu_var(softnet_data);
3142
3143 rcu_read_lock();
3144 fl = rcu_dereference(sd->flow_limit);
3145 if (fl) {
3146 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);
3147 old_flow = fl->history[fl->history_head];
3148 fl->history[fl->history_head] = new_flow;
3149
3150 fl->history_head++;
3151 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3152
3153 if (likely(fl->buckets[old_flow]))
3154 fl->buckets[old_flow]--;
3155
3156 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3157 fl->count++;
3158 rcu_read_unlock();
3159 return true;
3160 }
3161 }
3162 rcu_read_unlock();
3163#endif
3164 return false;
3165}
3166
3068/* 3167/*
3069 * enqueue_to_backlog is called to queue an skb to a per CPU backlog 3168 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
3070 * queue (may be a remote CPU queue). 3169 * queue (may be a remote CPU queue).
@@ -3074,13 +3173,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3074{ 3173{
3075 struct softnet_data *sd; 3174 struct softnet_data *sd;
3076 unsigned long flags; 3175 unsigned long flags;
3176 unsigned int qlen;
3077 3177
3078 sd = &per_cpu(softnet_data, cpu); 3178 sd = &per_cpu(softnet_data, cpu);
3079 3179
3080 local_irq_save(flags); 3180 local_irq_save(flags);
3081 3181
3082 rps_lock(sd); 3182 rps_lock(sd);
3083 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { 3183 qlen = skb_queue_len(&sd->input_pkt_queue);
3184 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3084 if (skb_queue_len(&sd->input_pkt_queue)) { 3185 if (skb_queue_len(&sd->input_pkt_queue)) {
3085enqueue: 3186enqueue:
3086 __skb_queue_tail(&sd->input_pkt_queue, skb); 3187 __skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -3479,8 +3580,15 @@ ncls:
3479 } 3580 }
3480 } 3581 }
3481 3582
3482 if (vlan_tx_nonzero_tag_present(skb)) 3583 if (unlikely(vlan_tx_tag_present(skb))) {
3483 skb->pkt_type = PACKET_OTHERHOST; 3584 if (vlan_tx_tag_get_id(skb))
3585 skb->pkt_type = PACKET_OTHERHOST;
3586 /* Note: we might in the future use prio bits
3587 * and set skb->priority like in vlan_do_receive()
3588 * For the time being, just ignore Priority Code Point
3589 */
3590 skb->vlan_tci = 0;
3591 }
3484 3592
3485 /* deliver only exact match when indicated */ 3593 /* deliver only exact match when indicated */
3486 null_or_dev = deliver_exact ? skb->dev : NULL; 3594 null_or_dev = deliver_exact ? skb->dev : NULL;
@@ -3828,7 +3936,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
3828 NAPI_GRO_CB(skb)->frag0 = NULL; 3936 NAPI_GRO_CB(skb)->frag0 = NULL;
3829 NAPI_GRO_CB(skb)->frag0_len = 0; 3937 NAPI_GRO_CB(skb)->frag0_len = 0;
3830 3938
3831 if (skb->mac_header == skb->tail && 3939 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3832 pinfo->nr_frags && 3940 pinfo->nr_frags &&
3833 !PageHighMem(skb_frag_page(frag0))) { 3941 !PageHighMem(skb_frag_page(frag0))) {
3834 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); 3942 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
@@ -4072,6 +4180,58 @@ void napi_complete(struct napi_struct *n)
4072} 4180}
4073EXPORT_SYMBOL(napi_complete); 4181EXPORT_SYMBOL(napi_complete);
4074 4182
4183/* must be called under rcu_read_lock(), as we dont take a reference */
4184struct napi_struct *napi_by_id(unsigned int napi_id)
4185{
4186 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4187 struct napi_struct *napi;
4188
4189 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
4190 if (napi->napi_id == napi_id)
4191 return napi;
4192
4193 return NULL;
4194}
4195EXPORT_SYMBOL_GPL(napi_by_id);
4196
4197void napi_hash_add(struct napi_struct *napi)
4198{
4199 if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
4200
4201 spin_lock(&napi_hash_lock);
4202
4203 /* 0 is not a valid id, we also skip an id that is taken
4204 * we expect both events to be extremely rare
4205 */
4206 napi->napi_id = 0;
4207 while (!napi->napi_id) {
4208 napi->napi_id = ++napi_gen_id;
4209 if (napi_by_id(napi->napi_id))
4210 napi->napi_id = 0;
4211 }
4212
4213 hlist_add_head_rcu(&napi->napi_hash_node,
4214 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
4215
4216 spin_unlock(&napi_hash_lock);
4217 }
4218}
4219EXPORT_SYMBOL_GPL(napi_hash_add);
4220
4221/* Warning : caller is responsible to make sure rcu grace period
4222 * is respected before freeing memory containing @napi
4223 */
4224void napi_hash_del(struct napi_struct *napi)
4225{
4226 spin_lock(&napi_hash_lock);
4227
4228 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
4229 hlist_del_rcu(&napi->napi_hash_node);
4230
4231 spin_unlock(&napi_hash_lock);
4232}
4233EXPORT_SYMBOL_GPL(napi_hash_del);
4234
4075void netif_napi_add(struct net_device *dev, struct napi_struct *napi, 4235void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4076 int (*poll)(struct napi_struct *, int), int weight) 4236 int (*poll)(struct napi_struct *, int), int weight)
4077{ 4237{
@@ -4207,57 +4367,48 @@ softnet_break:
4207 goto out; 4367 goto out;
4208} 4368}
4209 4369
4210struct netdev_upper { 4370struct netdev_adjacent {
4211 struct net_device *dev; 4371 struct net_device *dev;
4372
4373 /* upper master flag, there can only be one master device per list */
4212 bool master; 4374 bool master;
4375
4376 /* indicates that this dev is our first-level lower/upper device */
4377 bool neighbour;
4378
4379 /* counter for the number of times this device was added to us */
4380 u16 ref_nr;
4381
4213 struct list_head list; 4382 struct list_head list;
4214 struct rcu_head rcu; 4383 struct rcu_head rcu;
4215 struct list_head search_list;
4216}; 4384};
4217 4385
4218static void __append_search_uppers(struct list_head *search_list, 4386static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4219 struct net_device *dev) 4387 struct net_device *adj_dev,
4388 bool upper)
4220{ 4389{
4221 struct netdev_upper *upper; 4390 struct netdev_adjacent *adj;
4391 struct list_head *dev_list;
4222 4392
4223 list_for_each_entry(upper, &dev->upper_dev_list, list) { 4393 dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
4224 /* check if this upper is not already in search list */ 4394
4225 if (list_empty(&upper->search_list)) 4395 list_for_each_entry(adj, dev_list, list) {
4226 list_add_tail(&upper->search_list, search_list); 4396 if (adj->dev == adj_dev)
4397 return adj;
4227 } 4398 }
4399 return NULL;
4228} 4400}
4229 4401
4230static bool __netdev_search_upper_dev(struct net_device *dev, 4402static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
4231 struct net_device *upper_dev) 4403 struct net_device *udev)
4232{ 4404{
4233 LIST_HEAD(search_list); 4405 return __netdev_find_adj(dev, udev, true);
4234 struct netdev_upper *upper;
4235 struct netdev_upper *tmp;
4236 bool ret = false;
4237
4238 __append_search_uppers(&search_list, dev);
4239 list_for_each_entry(upper, &search_list, search_list) {
4240 if (upper->dev == upper_dev) {
4241 ret = true;
4242 break;
4243 }
4244 __append_search_uppers(&search_list, upper->dev);
4245 }
4246 list_for_each_entry_safe(upper, tmp, &search_list, search_list)
4247 INIT_LIST_HEAD(&upper->search_list);
4248 return ret;
4249} 4406}
4250 4407
4251static struct netdev_upper *__netdev_find_upper(struct net_device *dev, 4408static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
4252 struct net_device *upper_dev) 4409 struct net_device *ldev)
4253{ 4410{
4254 struct netdev_upper *upper; 4411 return __netdev_find_adj(dev, ldev, false);
4255
4256 list_for_each_entry(upper, &dev->upper_dev_list, list) {
4257 if (upper->dev == upper_dev)
4258 return upper;
4259 }
4260 return NULL;
4261} 4412}
4262 4413
4263/** 4414/**
@@ -4302,7 +4453,7 @@ EXPORT_SYMBOL(netdev_has_any_upper_dev);
4302 */ 4453 */
4303struct net_device *netdev_master_upper_dev_get(struct net_device *dev) 4454struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4304{ 4455{
4305 struct netdev_upper *upper; 4456 struct netdev_adjacent *upper;
4306 4457
4307 ASSERT_RTNL(); 4458 ASSERT_RTNL();
4308 4459
@@ -4310,13 +4461,38 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4310 return NULL; 4461 return NULL;
4311 4462
4312 upper = list_first_entry(&dev->upper_dev_list, 4463 upper = list_first_entry(&dev->upper_dev_list,
4313 struct netdev_upper, list); 4464 struct netdev_adjacent, list);
4314 if (likely(upper->master)) 4465 if (likely(upper->master))
4315 return upper->dev; 4466 return upper->dev;
4316 return NULL; 4467 return NULL;
4317} 4468}
4318EXPORT_SYMBOL(netdev_master_upper_dev_get); 4469EXPORT_SYMBOL(netdev_master_upper_dev_get);
4319 4470
4471/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
4472 * @dev: device
4473 * @iter: list_head ** of the current position
4474 *
4475 * Gets the next device from the dev's upper list, starting from iter
4476 * position. The caller must hold RCU read lock.
4477 */
4478struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
4479 struct list_head **iter)
4480{
4481 struct netdev_adjacent *upper;
4482
4483 WARN_ON_ONCE(!rcu_read_lock_held());
4484
4485 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4486
4487 if (&upper->list == &dev->upper_dev_list)
4488 return NULL;
4489
4490 *iter = &upper->list;
4491
4492 return upper->dev;
4493}
4494EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
4495
4320/** 4496/**
4321 * netdev_master_upper_dev_get_rcu - Get master upper device 4497 * netdev_master_upper_dev_get_rcu - Get master upper device
4322 * @dev: device 4498 * @dev: device
@@ -4326,20 +4502,158 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get);
4326 */ 4502 */
4327struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) 4503struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4328{ 4504{
4329 struct netdev_upper *upper; 4505 struct netdev_adjacent *upper;
4330 4506
4331 upper = list_first_or_null_rcu(&dev->upper_dev_list, 4507 upper = list_first_or_null_rcu(&dev->upper_dev_list,
4332 struct netdev_upper, list); 4508 struct netdev_adjacent, list);
4333 if (upper && likely(upper->master)) 4509 if (upper && likely(upper->master))
4334 return upper->dev; 4510 return upper->dev;
4335 return NULL; 4511 return NULL;
4336} 4512}
4337EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); 4513EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4338 4514
4515static int __netdev_adjacent_dev_insert(struct net_device *dev,
4516 struct net_device *adj_dev,
4517 bool neighbour, bool master,
4518 bool upper)
4519{
4520 struct netdev_adjacent *adj;
4521
4522 adj = __netdev_find_adj(dev, adj_dev, upper);
4523
4524 if (adj) {
4525 BUG_ON(neighbour);
4526 adj->ref_nr++;
4527 return 0;
4528 }
4529
4530 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
4531 if (!adj)
4532 return -ENOMEM;
4533
4534 adj->dev = adj_dev;
4535 adj->master = master;
4536 adj->neighbour = neighbour;
4537 adj->ref_nr = 1;
4538
4539 dev_hold(adj_dev);
4540 pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
4541 adj_dev->name, upper ? "upper" : "lower", dev->name,
4542 adj_dev->name);
4543
4544 if (!upper) {
4545 list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
4546 return 0;
4547 }
4548
4549 /* Ensure that master upper link is always the first item in list. */
4550 if (master)
4551 list_add_rcu(&adj->list, &dev->upper_dev_list);
4552 else
4553 list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
4554
4555 return 0;
4556}
4557
4558static inline int __netdev_upper_dev_insert(struct net_device *dev,
4559 struct net_device *udev,
4560 bool master, bool neighbour)
4561{
4562 return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
4563 true);
4564}
4565
4566static inline int __netdev_lower_dev_insert(struct net_device *dev,
4567 struct net_device *ldev,
4568 bool neighbour)
4569{
4570 return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
4571 false);
4572}
4573
4574void __netdev_adjacent_dev_remove(struct net_device *dev,
4575 struct net_device *adj_dev, bool upper)
4576{
4577 struct netdev_adjacent *adj;
4578
4579 if (upper)
4580 adj = __netdev_find_upper(dev, adj_dev);
4581 else
4582 adj = __netdev_find_lower(dev, adj_dev);
4583
4584 if (!adj)
4585 BUG();
4586
4587 if (adj->ref_nr > 1) {
4588 adj->ref_nr--;
4589 return;
4590 }
4591
4592 list_del_rcu(&adj->list);
4593 pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
4594 adj_dev->name, upper ? "upper" : "lower", dev->name,
4595 adj_dev->name);
4596 dev_put(adj_dev);
4597 kfree_rcu(adj, rcu);
4598}
4599
4600static inline void __netdev_upper_dev_remove(struct net_device *dev,
4601 struct net_device *udev)
4602{
4603 return __netdev_adjacent_dev_remove(dev, udev, true);
4604}
4605
4606static inline void __netdev_lower_dev_remove(struct net_device *dev,
4607 struct net_device *ldev)
4608{
4609 return __netdev_adjacent_dev_remove(dev, ldev, false);
4610}
4611
4612int __netdev_adjacent_dev_insert_link(struct net_device *dev,
4613 struct net_device *upper_dev,
4614 bool master, bool neighbour)
4615{
4616 int ret;
4617
4618 ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
4619 if (ret)
4620 return ret;
4621
4622 ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
4623 if (ret) {
4624 __netdev_upper_dev_remove(dev, upper_dev);
4625 return ret;
4626 }
4627
4628 return 0;
4629}
4630
4631static inline int __netdev_adjacent_dev_link(struct net_device *dev,
4632 struct net_device *udev)
4633{
4634 return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
4635}
4636
4637static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4638 struct net_device *udev,
4639 bool master)
4640{
4641 return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
4642}
4643
4644void __netdev_adjacent_dev_unlink(struct net_device *dev,
4645 struct net_device *upper_dev)
4646{
4647 __netdev_upper_dev_remove(dev, upper_dev);
4648 __netdev_lower_dev_remove(upper_dev, dev);
4649}
4650
4651
4339static int __netdev_upper_dev_link(struct net_device *dev, 4652static int __netdev_upper_dev_link(struct net_device *dev,
4340 struct net_device *upper_dev, bool master) 4653 struct net_device *upper_dev, bool master)
4341{ 4654{
4342 struct netdev_upper *upper; 4655 struct netdev_adjacent *i, *j, *to_i, *to_j;
4656 int ret = 0;
4343 4657
4344 ASSERT_RTNL(); 4658 ASSERT_RTNL();
4345 4659
@@ -4347,7 +4661,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
4347 return -EBUSY; 4661 return -EBUSY;
4348 4662
4349 /* To prevent loops, check if dev is not upper device to upper_dev. */ 4663 /* To prevent loops, check if dev is not upper device to upper_dev. */
4350 if (__netdev_search_upper_dev(upper_dev, dev)) 4664 if (__netdev_find_upper(upper_dev, dev))
4351 return -EBUSY; 4665 return -EBUSY;
4352 4666
4353 if (__netdev_find_upper(dev, upper_dev)) 4667 if (__netdev_find_upper(dev, upper_dev))
@@ -4356,22 +4670,76 @@ static int __netdev_upper_dev_link(struct net_device *dev,
4356 if (master && netdev_master_upper_dev_get(dev)) 4670 if (master && netdev_master_upper_dev_get(dev))
4357 return -EBUSY; 4671 return -EBUSY;
4358 4672
4359 upper = kmalloc(sizeof(*upper), GFP_KERNEL); 4673 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
4360 if (!upper) 4674 if (ret)
4361 return -ENOMEM; 4675 return ret;
4362 4676
4363 upper->dev = upper_dev; 4677 /* Now that we linked these devs, make all the upper_dev's
4364 upper->master = master; 4678 * upper_dev_list visible to every dev's lower_dev_list and vice
4365 INIT_LIST_HEAD(&upper->search_list); 4679 * versa, and don't forget the devices itself. All of these
4680 * links are non-neighbours.
4681 */
4682 list_for_each_entry(i, &dev->lower_dev_list, list) {
4683 list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
4684 ret = __netdev_adjacent_dev_link(i->dev, j->dev);
4685 if (ret)
4686 goto rollback_mesh;
4687 }
4688 }
4366 4689
4367 /* Ensure that master upper link is always the first item in list. */ 4690 /* add dev to every upper_dev's upper device */
4368 if (master) 4691 list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
4369 list_add_rcu(&upper->list, &dev->upper_dev_list); 4692 ret = __netdev_adjacent_dev_link(dev, i->dev);
4370 else 4693 if (ret)
4371 list_add_tail_rcu(&upper->list, &dev->upper_dev_list); 4694 goto rollback_upper_mesh;
4372 dev_hold(upper_dev); 4695 }
4373 4696
4697 /* add upper_dev to every dev's lower device */
4698 list_for_each_entry(i, &dev->lower_dev_list, list) {
4699 ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
4700 if (ret)
4701 goto rollback_lower_mesh;
4702 }
4703
4704 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4374 return 0; 4705 return 0;
4706
4707rollback_lower_mesh:
4708 to_i = i;
4709 list_for_each_entry(i, &dev->lower_dev_list, list) {
4710 if (i == to_i)
4711 break;
4712 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4713 }
4714
4715 i = NULL;
4716
4717rollback_upper_mesh:
4718 to_i = i;
4719 list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
4720 if (i == to_i)
4721 break;
4722 __netdev_adjacent_dev_unlink(dev, i->dev);
4723 }
4724
4725 i = j = NULL;
4726
4727rollback_mesh:
4728 to_i = i;
4729 to_j = j;
4730 list_for_each_entry(i, &dev->lower_dev_list, list) {
4731 list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
4732 if (i == to_i && j == to_j)
4733 break;
4734 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4735 }
4736 if (i == to_i)
4737 break;
4738 }
4739
4740 __netdev_adjacent_dev_unlink(dev, upper_dev);
4741
4742 return ret;
4375} 4743}
4376 4744
4377/** 4745/**
@@ -4420,16 +4788,29 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
4420void netdev_upper_dev_unlink(struct net_device *dev, 4788void netdev_upper_dev_unlink(struct net_device *dev,
4421 struct net_device *upper_dev) 4789 struct net_device *upper_dev)
4422{ 4790{
4423 struct netdev_upper *upper; 4791 struct netdev_adjacent *i, *j;
4424
4425 ASSERT_RTNL(); 4792 ASSERT_RTNL();
4426 4793
4427 upper = __netdev_find_upper(dev, upper_dev); 4794 __netdev_adjacent_dev_unlink(dev, upper_dev);
4428 if (!upper) 4795
4429 return; 4796 /* Here is the tricky part. We must remove all dev's lower
4430 list_del_rcu(&upper->list); 4797 * devices from all upper_dev's upper devices and vice
4431 dev_put(upper_dev); 4798 * versa, to maintain the graph relationship.
4432 kfree_rcu(upper, rcu); 4799 */
4800 list_for_each_entry(i, &dev->lower_dev_list, list)
4801 list_for_each_entry(j, &upper_dev->upper_dev_list, list)
4802 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4803
4804 /* remove also the devices itself from lower/upper device
4805 * list
4806 */
4807 list_for_each_entry(i, &dev->lower_dev_list, list)
4808 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4809
4810 list_for_each_entry(i, &upper_dev->upper_dev_list, list)
4811 __netdev_adjacent_dev_unlink(dev, i->dev);
4812
4813 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4433} 4814}
4434EXPORT_SYMBOL(netdev_upper_dev_unlink); 4815EXPORT_SYMBOL(netdev_upper_dev_unlink);
4435 4816
@@ -4700,8 +5081,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4700 } 5081 }
4701 5082
4702 if (dev->flags & IFF_UP && 5083 if (dev->flags & IFF_UP &&
4703 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) 5084 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
4704 call_netdevice_notifiers(NETDEV_CHANGE, dev); 5085 struct netdev_notifier_change_info change_info;
5086
5087 change_info.flags_changed = changes;
5088 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
5089 &change_info.info);
5090 }
4705} 5091}
4706 5092
4707/** 5093/**
@@ -4823,6 +5209,24 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier)
4823EXPORT_SYMBOL(dev_change_carrier); 5209EXPORT_SYMBOL(dev_change_carrier);
4824 5210
4825/** 5211/**
5212 * dev_get_phys_port_id - Get device physical port ID
5213 * @dev: device
5214 * @ppid: port ID
5215 *
5216 * Get device physical port ID
5217 */
5218int dev_get_phys_port_id(struct net_device *dev,
5219 struct netdev_phys_port_id *ppid)
5220{
5221 const struct net_device_ops *ops = dev->netdev_ops;
5222
5223 if (!ops->ndo_get_phys_port_id)
5224 return -EOPNOTSUPP;
5225 return ops->ndo_get_phys_port_id(dev, ppid);
5226}
5227EXPORT_SYMBOL(dev_get_phys_port_id);
5228
5229/**
4826 * dev_new_index - allocate an ifindex 5230 * dev_new_index - allocate an ifindex
4827 * @net: the applicable net namespace 5231 * @net: the applicable net namespace
4828 * 5232 *
@@ -4843,10 +5247,12 @@ static int dev_new_index(struct net *net)
4843 5247
4844/* Delayed registration/unregisteration */ 5248/* Delayed registration/unregisteration */
4845static LIST_HEAD(net_todo_list); 5249static LIST_HEAD(net_todo_list);
5250static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
4846 5251
4847static void net_set_todo(struct net_device *dev) 5252static void net_set_todo(struct net_device *dev)
4848{ 5253{
4849 list_add_tail(&dev->todo_list, &net_todo_list); 5254 list_add_tail(&dev->todo_list, &net_todo_list);
5255 dev_net(dev)->dev_unreg_count++;
4850} 5256}
4851 5257
4852static void rollback_registered_many(struct list_head *head) 5258static void rollback_registered_many(struct list_head *head)
@@ -5124,17 +5530,28 @@ static void netdev_init_one_queue(struct net_device *dev,
5124#endif 5530#endif
5125} 5531}
5126 5532
5533static void netif_free_tx_queues(struct net_device *dev)
5534{
5535 if (is_vmalloc_addr(dev->_tx))
5536 vfree(dev->_tx);
5537 else
5538 kfree(dev->_tx);
5539}
5540
5127static int netif_alloc_netdev_queues(struct net_device *dev) 5541static int netif_alloc_netdev_queues(struct net_device *dev)
5128{ 5542{
5129 unsigned int count = dev->num_tx_queues; 5543 unsigned int count = dev->num_tx_queues;
5130 struct netdev_queue *tx; 5544 struct netdev_queue *tx;
5545 size_t sz = count * sizeof(*tx);
5131 5546
5132 BUG_ON(count < 1); 5547 BUG_ON(count < 1 || count > 0xffff);
5133
5134 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5135 if (!tx)
5136 return -ENOMEM;
5137 5548
5549 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
5550 if (!tx) {
5551 tx = vzalloc(sz);
5552 if (!tx)
5553 return -ENOMEM;
5554 }
5138 dev->_tx = tx; 5555 dev->_tx = tx;
5139 5556
5140 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5557 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -5235,6 +5652,10 @@ int register_netdevice(struct net_device *dev)
5235 */ 5652 */
5236 dev->hw_enc_features |= NETIF_F_SG; 5653 dev->hw_enc_features |= NETIF_F_SG;
5237 5654
5655 /* Make NETIF_F_SG inheritable to MPLS.
5656 */
5657 dev->mpls_features |= NETIF_F_SG;
5658
5238 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5659 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5239 ret = notifier_to_errno(ret); 5660 ret = notifier_to_errno(ret);
5240 if (ret) 5661 if (ret)
@@ -5499,6 +5920,12 @@ void netdev_run_todo(void)
5499 if (dev->destructor) 5920 if (dev->destructor)
5500 dev->destructor(dev); 5921 dev->destructor(dev);
5501 5922
5923 /* Report a network device has been unregistered */
5924 rtnl_lock();
5925 dev_net(dev)->dev_unreg_count--;
5926 __rtnl_unlock();
5927 wake_up(&netdev_unregistering_wq);
5928
5502 /* Free network device */ 5929 /* Free network device */
5503 kobject_put(&dev->dev.kobj); 5930 kobject_put(&dev->dev.kobj);
5504 } 5931 }
@@ -5651,6 +6078,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5651 INIT_LIST_HEAD(&dev->unreg_list); 6078 INIT_LIST_HEAD(&dev->unreg_list);
5652 INIT_LIST_HEAD(&dev->link_watch_list); 6079 INIT_LIST_HEAD(&dev->link_watch_list);
5653 INIT_LIST_HEAD(&dev->upper_dev_list); 6080 INIT_LIST_HEAD(&dev->upper_dev_list);
6081 INIT_LIST_HEAD(&dev->lower_dev_list);
5654 dev->priv_flags = IFF_XMIT_DST_RELEASE; 6082 dev->priv_flags = IFF_XMIT_DST_RELEASE;
5655 setup(dev); 6083 setup(dev);
5656 6084
@@ -5678,7 +6106,7 @@ free_all:
5678 6106
5679free_pcpu: 6107free_pcpu:
5680 free_percpu(dev->pcpu_refcnt); 6108 free_percpu(dev->pcpu_refcnt);
5681 kfree(dev->_tx); 6109 netif_free_tx_queues(dev);
5682#ifdef CONFIG_RPS 6110#ifdef CONFIG_RPS
5683 kfree(dev->_rx); 6111 kfree(dev->_rx);
5684#endif 6112#endif
@@ -5703,7 +6131,7 @@ void free_netdev(struct net_device *dev)
5703 6131
5704 release_net(dev_net(dev)); 6132 release_net(dev_net(dev));
5705 6133
5706 kfree(dev->_tx); 6134 netif_free_tx_queues(dev);
5707#ifdef CONFIG_RPS 6135#ifdef CONFIG_RPS
5708 kfree(dev->_rx); 6136 kfree(dev->_rx);
5709#endif 6137#endif
@@ -6014,7 +6442,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all,
6014} 6442}
6015EXPORT_SYMBOL(netdev_increment_features); 6443EXPORT_SYMBOL(netdev_increment_features);
6016 6444
6017static struct hlist_head *netdev_create_hash(void) 6445static struct hlist_head * __net_init netdev_create_hash(void)
6018{ 6446{
6019 int i; 6447 int i;
6020 struct hlist_head *hash; 6448 struct hlist_head *hash;
@@ -6183,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net)
6183 rtnl_unlock(); 6611 rtnl_unlock();
6184} 6612}
6185 6613
6614static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
6615{
6616 /* Return with the rtnl_lock held when there are no network
6617 * devices unregistering in any network namespace in net_list.
6618 */
6619 struct net *net;
6620 bool unregistering;
6621 DEFINE_WAIT(wait);
6622
6623 for (;;) {
6624 prepare_to_wait(&netdev_unregistering_wq, &wait,
6625 TASK_UNINTERRUPTIBLE);
6626 unregistering = false;
6627 rtnl_lock();
6628 list_for_each_entry(net, net_list, exit_list) {
6629 if (net->dev_unreg_count > 0) {
6630 unregistering = true;
6631 break;
6632 }
6633 }
6634 if (!unregistering)
6635 break;
6636 __rtnl_unlock();
6637 schedule();
6638 }
6639 finish_wait(&netdev_unregistering_wq, &wait);
6640}
6641
6186static void __net_exit default_device_exit_batch(struct list_head *net_list) 6642static void __net_exit default_device_exit_batch(struct list_head *net_list)
6187{ 6643{
6188 /* At exit all network devices most be removed from a network 6644 /* At exit all network devices most be removed from a network
@@ -6194,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
6194 struct net *net; 6650 struct net *net;
6195 LIST_HEAD(dev_kill_list); 6651 LIST_HEAD(dev_kill_list);
6196 6652
6197 rtnl_lock(); 6653 /* To prevent network device cleanup code from dereferencing
6654 * loopback devices or network devices that have been freed
6655 * wait here for all pending unregistrations to complete,
6656 * before unregistring the loopback device and allowing the
6657 * network namespace be freed.
6658 *
6659 * The netdev todo list containing all network devices
6660 * unregistrations that happen in default_device_exit_batch
6661 * will run in the rtnl_unlock() at the end of
6662 * default_device_exit_batch.
6663 */
6664 rtnl_lock_unregistering(net_list);
6198 list_for_each_entry(net, net_list, exit_list) { 6665 list_for_each_entry(net, net_list, exit_list) {
6199 for_each_netdev_reverse(net, dev) { 6666 for_each_netdev_reverse(net, dev) {
6200 if (dev->rtnl_link_ops) 6667 if (dev->rtnl_link_ops)
@@ -6270,6 +6737,10 @@ static int __init net_dev_init(void)
6270 sd->backlog.weight = weight_p; 6737 sd->backlog.weight = weight_p;
6271 sd->backlog.gro_list = NULL; 6738 sd->backlog.gro_list = NULL;
6272 sd->backlog.gro_count = 0; 6739 sd->backlog.gro_count = 0;
6740
6741#ifdef CONFIG_NET_FLOW_LIMIT
6742 sd->flow_limit = NULL;
6743#endif
6273 } 6744 }
6274 6745
6275 dev_boot_phase = 0; 6746 dev_boot_phase = 0;
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 6cc0481faade..5b7d0e1d0664 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -19,9 +19,8 @@
19 19
20static int dev_ifname(struct net *net, struct ifreq __user *arg) 20static int dev_ifname(struct net *net, struct ifreq __user *arg)
21{ 21{
22 struct net_device *dev;
23 struct ifreq ifr; 22 struct ifreq ifr;
24 unsigned seq; 23 int error;
25 24
26 /* 25 /*
27 * Fetch the caller's info block. 26 * Fetch the caller's info block.
@@ -30,19 +29,9 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
30 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 29 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
31 return -EFAULT; 30 return -EFAULT;
32 31
33retry: 32 error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
34 seq = read_seqcount_begin(&devnet_rename_seq); 33 if (error)
35 rcu_read_lock(); 34 return error;
36 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
37 if (!dev) {
38 rcu_read_unlock();
39 return -ENODEV;
40 }
41
42 strcpy(ifr.ifr_name, dev->name);
43 rcu_read_unlock();
44 if (read_seqcount_retry(&devnet_rename_seq, seq))
45 goto retry;
46 35
47 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 36 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
48 return -EFAULT; 37 return -EFAULT;
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index d23b6682f4e9..5e78d44333b9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -295,9 +295,9 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
295} 295}
296 296
297static int dropmon_net_event(struct notifier_block *ev_block, 297static int dropmon_net_event(struct notifier_block *ev_block,
298 unsigned long event, void *ptr) 298 unsigned long event, void *ptr)
299{ 299{
300 struct net_device *dev = ptr; 300 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
301 struct dm_hw_stat_delta *new_stat = NULL; 301 struct dm_hw_stat_delta *new_stat = NULL;
302 struct dm_hw_stat_delta *tmp; 302 struct dm_hw_stat_delta *tmp;
303 303
diff --git a/net/core/dst.c b/net/core/dst.c
index df9cc810ec8e..ca4231ec7347 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -372,7 +372,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
372static int dst_dev_event(struct notifier_block *this, unsigned long event, 372static int dst_dev_event(struct notifier_block *this, unsigned long event,
373 void *ptr) 373 void *ptr)
374{ 374{
375 struct net_device *dev = ptr; 375 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
376 struct dst_entry *dst, *last = NULL; 376 struct dst_entry *dst, *last = NULL;
377 377
378 switch (event) { 378 switch (event) {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 22efdaa76ebf..78e9d9223e40 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -60,10 +60,10 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
60 [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", 60 [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
61 [NETIF_F_HIGHDMA_BIT] = "highdma", 61 [NETIF_F_HIGHDMA_BIT] = "highdma",
62 [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", 62 [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
63 [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-ctag-hw-insert", 63 [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert",
64 64
65 [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-ctag-hw-parse", 65 [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse",
66 [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-ctag-filter", 66 [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter",
67 [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert", 67 [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
68 [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse", 68 [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
69 [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter", 69 [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
@@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
82 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", 82 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
83 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", 83 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
84 [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", 84 [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
85 [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation",
85 86
86 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", 87 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
87 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", 88 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
@@ -278,11 +279,16 @@ static u32 __ethtool_get_flags(struct net_device *dev)
278{ 279{
279 u32 flags = 0; 280 u32 flags = 0;
280 281
281 if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; 282 if (dev->features & NETIF_F_LRO)
282 if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN; 283 flags |= ETH_FLAG_LRO;
283 if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN; 284 if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
284 if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; 285 flags |= ETH_FLAG_RXVLAN;
285 if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; 286 if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
287 flags |= ETH_FLAG_TXVLAN;
288 if (dev->features & NETIF_F_NTUPLE)
289 flags |= ETH_FLAG_NTUPLE;
290 if (dev->features & NETIF_F_RXHASH)
291 flags |= ETH_FLAG_RXHASH;
286 292
287 return flags; 293 return flags;
288} 294}
@@ -294,11 +300,16 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
294 if (data & ~ETH_ALL_FLAGS) 300 if (data & ~ETH_ALL_FLAGS)
295 return -EINVAL; 301 return -EINVAL;
296 302
297 if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; 303 if (data & ETH_FLAG_LRO)
298 if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX; 304 features |= NETIF_F_LRO;
299 if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX; 305 if (data & ETH_FLAG_RXVLAN)
300 if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; 306 features |= NETIF_F_HW_VLAN_CTAG_RX;
301 if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; 307 if (data & ETH_FLAG_TXVLAN)
308 features |= NETIF_F_HW_VLAN_CTAG_TX;
309 if (data & ETH_FLAG_NTUPLE)
310 features |= NETIF_F_NTUPLE;
311 if (data & ETH_FLAG_RXHASH)
312 features |= NETIF_F_RXHASH;
302 313
303 /* allow changing only bits set in hw_features */ 314 /* allow changing only bits set in hw_features */
304 changed = (features ^ dev->features) & ETH_ALL_FEATURES; 315 changed = (features ^ dev->features) & ETH_ALL_FEATURES;
@@ -1319,10 +1330,19 @@ static int ethtool_get_dump_data(struct net_device *dev,
1319 if (ret) 1330 if (ret)
1320 return ret; 1331 return ret;
1321 1332
1322 len = (tmp.len > dump.len) ? dump.len : tmp.len; 1333 len = min(tmp.len, dump.len);
1323 if (!len) 1334 if (!len)
1324 return -EFAULT; 1335 return -EFAULT;
1325 1336
1337 /* Don't ever let the driver think there's more space available
1338 * than it requested with .get_dump_flag().
1339 */
1340 dump.len = len;
1341
1342 /* Always allocate enough space to hold the whole thing so that the
1343 * driver does not need to check the length and bother with partial
1344 * dumping.
1345 */
1326 data = vzalloc(tmp.len); 1346 data = vzalloc(tmp.len);
1327 if (!data) 1347 if (!data)
1328 return -ENOMEM; 1348 return -ENOMEM;
@@ -1330,6 +1350,16 @@ static int ethtool_get_dump_data(struct net_device *dev,
1330 if (ret) 1350 if (ret)
1331 goto out; 1351 goto out;
1332 1352
1353 /* There are two sane possibilities:
1354 * 1. The driver's .get_dump_data() does not touch dump.len.
1355 * 2. Or it may set dump.len to how much it really writes, which
1356 * should be tmp.len (or len if it can do a partial dump).
1357 * In any case respond to userspace with the actual length of data
1358 * it's receiving.
1359 */
1360 WARN_ON(dump.len != len && dump.len != tmp.len);
1361 dump.len = len;
1362
1333 if (copy_to_user(useraddr, &dump, sizeof(dump))) { 1363 if (copy_to_user(useraddr, &dump, sizeof(dump))) {
1334 ret = -EFAULT; 1364 ret = -EFAULT;
1335 goto out; 1365 goto out;
@@ -1413,7 +1443,7 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
1413 modinfo.eeprom_len); 1443 modinfo.eeprom_len);
1414} 1444}
1415 1445
1416/* The main entry point in this file. Called from net/core/dev.c */ 1446/* The main entry point in this file. Called from net/core/dev_ioctl.c */
1417 1447
1418int dev_ethtool(struct net *net, struct ifreq *ifr) 1448int dev_ethtool(struct net *net, struct ifreq *ifr)
1419{ 1449{
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d5a9f8ead0d8..2e654138433c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,9 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
33 r->flags = flags; 33 r->flags = flags;
34 r->fr_net = hold_net(ops->fro_net); 34 r->fr_net = hold_net(ops->fro_net);
35 35
36 r->suppress_prefixlen = -1;
37 r->suppress_ifgroup = -1;
38
36 /* The lock is not required here, the list in unreacheable 39 /* The lock is not required here, the list in unreacheable
37 * at the moment this function is called */ 40 * at the moment this function is called */
38 list_add_tail(&r->list, &ops->rules_list); 41 list_add_tail(&r->list, &ops->rules_list);
@@ -226,6 +229,9 @@ jumped:
226 else 229 else
227 err = ops->action(rule, fl, flags, arg); 230 err = ops->action(rule, fl, flags, arg);
228 231
232 if (!err && ops->suppress && ops->suppress(rule, arg))
233 continue;
234
229 if (err != -EAGAIN) { 235 if (err != -EAGAIN) {
230 if ((arg->flags & FIB_LOOKUP_NOREF) || 236 if ((arg->flags & FIB_LOOKUP_NOREF) ||
231 likely(atomic_inc_not_zero(&rule->refcnt))) { 237 likely(atomic_inc_not_zero(&rule->refcnt))) {
@@ -337,6 +343,15 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
337 rule->action = frh->action; 343 rule->action = frh->action;
338 rule->flags = frh->flags; 344 rule->flags = frh->flags;
339 rule->table = frh_get_table(frh, tb); 345 rule->table = frh_get_table(frh, tb);
346 if (tb[FRA_SUPPRESS_PREFIXLEN])
347 rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
348 else
349 rule->suppress_prefixlen = -1;
350
351 if (tb[FRA_SUPPRESS_IFGROUP])
352 rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
353 else
354 rule->suppress_ifgroup = -1;
340 355
341 if (!tb[FRA_PRIORITY] && ops->default_pref) 356 if (!tb[FRA_PRIORITY] && ops->default_pref)
342 rule->pref = ops->default_pref(ops); 357 rule->pref = ops->default_pref(ops);
@@ -523,6 +538,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
523 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */ 538 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
524 + nla_total_size(4) /* FRA_PRIORITY */ 539 + nla_total_size(4) /* FRA_PRIORITY */
525 + nla_total_size(4) /* FRA_TABLE */ 540 + nla_total_size(4) /* FRA_TABLE */
541 + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
542 + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
526 + nla_total_size(4) /* FRA_FWMARK */ 543 + nla_total_size(4) /* FRA_FWMARK */
527 + nla_total_size(4); /* FRA_FWMASK */ 544 + nla_total_size(4); /* FRA_FWMASK */
528 545
@@ -548,6 +565,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
548 frh->table = rule->table; 565 frh->table = rule->table;
549 if (nla_put_u32(skb, FRA_TABLE, rule->table)) 566 if (nla_put_u32(skb, FRA_TABLE, rule->table))
550 goto nla_put_failure; 567 goto nla_put_failure;
568 if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
569 goto nla_put_failure;
551 frh->res1 = 0; 570 frh->res1 = 0;
552 frh->res2 = 0; 571 frh->res2 = 0;
553 frh->action = rule->action; 572 frh->action = rule->action;
@@ -580,6 +599,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
580 (rule->target && 599 (rule->target &&
581 nla_put_u32(skb, FRA_GOTO, rule->target))) 600 nla_put_u32(skb, FRA_GOTO, rule->target)))
582 goto nla_put_failure; 601 goto nla_put_failure;
602
603 if (rule->suppress_ifgroup != -1) {
604 if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
605 goto nla_put_failure;
606 }
607
583 if (ops->fill(rule, skb, frh) < 0) 608 if (ops->fill(rule, skb, frh) < 0)
584 goto nla_put_failure; 609 goto nla_put_failure;
585 610
@@ -705,9 +730,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
705 730
706 731
707static int fib_rules_event(struct notifier_block *this, unsigned long event, 732static int fib_rules_event(struct notifier_block *this, unsigned long event,
708 void *ptr) 733 void *ptr)
709{ 734{
710 struct net_device *dev = ptr; 735 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
711 struct net *net = dev_net(dev); 736 struct net *net = dev_net(dev);
712 struct fib_rules_ops *ops; 737 struct fib_rules_ops *ops;
713 738
diff --git a/net/core/filter.c b/net/core/filter.c
index dad2a178f9f8..6438f29ff266 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -778,7 +778,7 @@ int sk_detach_filter(struct sock *sk)
778} 778}
779EXPORT_SYMBOL_GPL(sk_detach_filter); 779EXPORT_SYMBOL_GPL(sk_detach_filter);
780 780
781static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) 781void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
782{ 782{
783 static const u16 decodes[] = { 783 static const u16 decodes[] = {
784 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, 784 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
diff --git a/net/core/flow.c b/net/core/flow.c
index 7102f166482d..dfa602ceb8cd 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -403,7 +403,7 @@ void flow_cache_flush_deferred(void)
403 schedule_work(&flow_cache_flush_work); 403 schedule_work(&flow_cache_flush_work);
404} 404}
405 405
406static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) 406static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
407{ 407{
408 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); 408 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
409 size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); 409 size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
@@ -421,7 +421,7 @@ static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
421 return 0; 421 return 0;
422} 422}
423 423
424static int __cpuinit flow_cache_cpu(struct notifier_block *nfb, 424static int flow_cache_cpu(struct notifier_block *nfb,
425 unsigned long action, 425 unsigned long action,
426 void *hcpu) 426 void *hcpu)
427{ 427{
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 00ee068efc1c..8d7d0dd72db2 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -65,6 +65,7 @@ ipv6:
65 nhoff += sizeof(struct ipv6hdr); 65 nhoff += sizeof(struct ipv6hdr);
66 break; 66 break;
67 } 67 }
68 case __constant_htons(ETH_P_8021AD):
68 case __constant_htons(ETH_P_8021Q): { 69 case __constant_htons(ETH_P_8021Q): {
69 const struct vlan_hdr *vlan; 70 const struct vlan_hdr *vlan;
70 struct vlan_hdr _vlan; 71 struct vlan_hdr _vlan;
@@ -139,7 +140,11 @@ ipv6:
139 break; 140 break;
140 } 141 }
141 case IPPROTO_IPIP: 142 case IPPROTO_IPIP:
142 goto again; 143 proto = htons(ETH_P_IP);
144 goto ip;
145 case IPPROTO_IPV6:
146 proto = htons(ETH_P_IPV6);
147 goto ipv6;
143 default: 148 default:
144 break; 149 break;
145 } 150 }
@@ -149,8 +154,8 @@ ipv6:
149 if (poff >= 0) { 154 if (poff >= 0) {
150 __be32 *ports, _ports; 155 __be32 *ports, _ports;
151 156
152 nhoff += poff; 157 ports = skb_header_pointer(skb, nhoff + poff,
153 ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); 158 sizeof(_ports), &_ports);
154 if (ports) 159 if (ports)
155 flow->ports = *ports; 160 flow->ports = *ports;
156 } 161 }
@@ -345,14 +350,9 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
345 if (new_index < 0) 350 if (new_index < 0)
346 new_index = skb_tx_hash(dev, skb); 351 new_index = skb_tx_hash(dev, skb);
347 352
348 if (queue_index != new_index && sk) { 353 if (queue_index != new_index && sk &&
349 struct dst_entry *dst = 354 rcu_access_pointer(sk->sk_dst_cache))
350 rcu_dereference_check(sk->sk_dst_cache, 1); 355 sk_tx_queue_set(sk, new_index);
351
352 if (dst && skb_dst(skb) == dst)
353 sk_tx_queue_set(sk, queue_index);
354
355 }
356 356
357 queue_index = new_index; 357 queue_index = new_index;
358 } 358 }
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index d9d198aa9fed..6b5b6e7013ca 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -82,7 +82,7 @@ struct gen_estimator
82{ 82{
83 struct list_head list; 83 struct list_head list;
84 struct gnet_stats_basic_packed *bstats; 84 struct gnet_stats_basic_packed *bstats;
85 struct gnet_stats_rate_est *rate_est; 85 struct gnet_stats_rate_est64 *rate_est;
86 spinlock_t *stats_lock; 86 spinlock_t *stats_lock;
87 int ewma_log; 87 int ewma_log;
88 u64 last_bytes; 88 u64 last_bytes;
@@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est)
167 167
168static 168static
169struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, 169struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
170 const struct gnet_stats_rate_est *rate_est) 170 const struct gnet_stats_rate_est64 *rate_est)
171{ 171{
172 struct rb_node *p = est_root.rb_node; 172 struct rb_node *p = est_root.rb_node;
173 173
@@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
203 * 203 *
204 */ 204 */
205int gen_new_estimator(struct gnet_stats_basic_packed *bstats, 205int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
206 struct gnet_stats_rate_est *rate_est, 206 struct gnet_stats_rate_est64 *rate_est,
207 spinlock_t *stats_lock, 207 spinlock_t *stats_lock,
208 struct nlattr *opt) 208 struct nlattr *opt)
209{ 209{
@@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator);
258 * Note : Caller should respect an RCU grace period before freeing stats_lock 258 * Note : Caller should respect an RCU grace period before freeing stats_lock
259 */ 259 */
260void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, 260void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
261 struct gnet_stats_rate_est *rate_est) 261 struct gnet_stats_rate_est64 *rate_est)
262{ 262{
263 struct gen_estimator *e; 263 struct gen_estimator *e;
264 264
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
290 * Returns 0 on success or a negative error code. 290 * Returns 0 on success or a negative error code.
291 */ 291 */
292int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, 292int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
293 struct gnet_stats_rate_est *rate_est, 293 struct gnet_stats_rate_est64 *rate_est,
294 spinlock_t *stats_lock, struct nlattr *opt) 294 spinlock_t *stats_lock, struct nlattr *opt)
295{ 295{
296 gen_kill_estimator(bstats, rate_est); 296 gen_kill_estimator(bstats, rate_est);
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator);
306 * Returns true if estimator is active, and false if not. 306 * Returns true if estimator is active, and false if not.
307 */ 307 */
308bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, 308bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
309 const struct gnet_stats_rate_est *rate_est) 309 const struct gnet_stats_rate_est64 *rate_est)
310{ 310{
311 bool res; 311 bool res;
312 312
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index ddedf211e588..9d3d9e78397b 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -143,18 +143,30 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
143int 143int
144gnet_stats_copy_rate_est(struct gnet_dump *d, 144gnet_stats_copy_rate_est(struct gnet_dump *d,
145 const struct gnet_stats_basic_packed *b, 145 const struct gnet_stats_basic_packed *b,
146 struct gnet_stats_rate_est *r) 146 struct gnet_stats_rate_est64 *r)
147{ 147{
148 struct gnet_stats_rate_est est;
149 int res;
150
148 if (b && !gen_estimator_active(b, r)) 151 if (b && !gen_estimator_active(b, r))
149 return 0; 152 return 0;
150 153
154 est.bps = min_t(u64, UINT_MAX, r->bps);
155 /* we have some time before reaching 2^32 packets per second */
156 est.pps = r->pps;
157
151 if (d->compat_tc_stats) { 158 if (d->compat_tc_stats) {
152 d->tc_stats.bps = r->bps; 159 d->tc_stats.bps = est.bps;
153 d->tc_stats.pps = r->pps; 160 d->tc_stats.pps = est.pps;
154 } 161 }
155 162
156 if (d->tail) 163 if (d->tail) {
157 return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r)); 164 res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est));
165 if (res < 0 || est.bps == r->bps)
166 return res;
167 /* emit 64bit stats only if needed */
168 return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r));
169 }
158 170
159 return 0; 171 return 0;
160} 172}
diff --git a/net/core/iovec.c b/net/core/iovec.c
index de178e462682..b77eeecc0011 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -212,3 +212,27 @@ out_fault:
212 goto out; 212 goto out;
213} 213}
214EXPORT_SYMBOL(csum_partial_copy_fromiovecend); 214EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
215
216unsigned long iov_pages(const struct iovec *iov, int offset,
217 unsigned long nr_segs)
218{
219 unsigned long seg, base;
220 int pages = 0, len, size;
221
222 while (nr_segs && (offset >= iov->iov_len)) {
223 offset -= iov->iov_len;
224 ++iov;
225 --nr_segs;
226 }
227
228 for (seg = 0; seg < nr_segs; seg++) {
229 base = (unsigned long)iov[seg].iov_base + offset;
230 len = iov[seg].iov_len - offset;
231 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
232 pages += size;
233 offset = 0;
234 }
235
236 return pages;
237}
238EXPORT_SYMBOL(iov_pages);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8f82a5cc3851..9c3a839322ba 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -92,6 +92,9 @@ static bool linkwatch_urgent_event(struct net_device *dev)
92 if (dev->ifindex != dev->iflink) 92 if (dev->ifindex != dev->iflink)
93 return true; 93 return true;
94 94
95 if (dev->priv_flags & IFF_TEAM_PORT)
96 return true;
97
95 return netif_carrier_ok(dev) && qdisc_tx_changing(dev); 98 return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
96} 99}
97 100
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5c56b217b999..6072610a8672 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
231 we must kill timers etc. and move 231 we must kill timers etc. and move
232 it to safe state. 232 it to safe state.
233 */ 233 */
234 skb_queue_purge(&n->arp_queue); 234 __skb_queue_purge(&n->arp_queue);
235 n->arp_queue_len_bytes = 0; 235 n->arp_queue_len_bytes = 0;
236 n->output = neigh_blackhole; 236 n->output = neigh_blackhole;
237 if (n->nud_state & NUD_VALID) 237 if (n->nud_state & NUD_VALID)
@@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
286 if (!n) 286 if (!n)
287 goto out_entries; 287 goto out_entries;
288 288
289 skb_queue_head_init(&n->arp_queue); 289 __skb_queue_head_init(&n->arp_queue);
290 rwlock_init(&n->lock); 290 rwlock_init(&n->lock);
291 seqlock_init(&n->ha_lock); 291 seqlock_init(&n->ha_lock);
292 n->updated = n->used = now; 292 n->updated = n->used = now;
@@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh)
708 if (neigh_del_timer(neigh)) 708 if (neigh_del_timer(neigh))
709 pr_warn("Impossible event\n"); 709 pr_warn("Impossible event\n");
710 710
711 skb_queue_purge(&neigh->arp_queue); 711 write_lock_bh(&neigh->lock);
712 __skb_queue_purge(&neigh->arp_queue);
713 write_unlock_bh(&neigh->lock);
712 neigh->arp_queue_len_bytes = 0; 714 neigh->arp_queue_len_bytes = 0;
713 715
714 if (dev->netdev_ops->ndo_neigh_destroy) 716 if (dev->netdev_ops->ndo_neigh_destroy)
@@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh)
858 neigh->ops->error_report(neigh, skb); 860 neigh->ops->error_report(neigh, skb);
859 write_lock(&neigh->lock); 861 write_lock(&neigh->lock);
860 } 862 }
861 skb_queue_purge(&neigh->arp_queue); 863 __skb_queue_purge(&neigh->arp_queue);
862 neigh->arp_queue_len_bytes = 0; 864 neigh->arp_queue_len_bytes = 0;
863} 865}
864 866
@@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1210 1212
1211 write_lock_bh(&neigh->lock); 1213 write_lock_bh(&neigh->lock);
1212 } 1214 }
1213 skb_queue_purge(&neigh->arp_queue); 1215 __skb_queue_purge(&neigh->arp_queue);
1214 neigh->arp_queue_len_bytes = 0; 1216 neigh->arp_queue_len_bytes = 0;
1215 } 1217 }
1216out: 1218out:
@@ -1419,7 +1421,7 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1419 1421
1420 for (p = &tbl->parms; p; p = p->next) { 1422 for (p = &tbl->parms; p; p = p->next) {
1421 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1423 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1422 (!p->dev && !ifindex)) 1424 (!p->dev && !ifindex && net_eq(net, &init_net)))
1423 return p; 1425 return p;
1424 } 1426 }
1425 1427
@@ -1429,30 +1431,28 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1429struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1431struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1430 struct neigh_table *tbl) 1432 struct neigh_table *tbl)
1431{ 1433{
1432 struct neigh_parms *p, *ref; 1434 struct neigh_parms *p;
1433 struct net *net = dev_net(dev); 1435 struct net *net = dev_net(dev);
1434 const struct net_device_ops *ops = dev->netdev_ops; 1436 const struct net_device_ops *ops = dev->netdev_ops;
1435 1437
1436 ref = lookup_neigh_parms(tbl, net, 0); 1438 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1437 if (!ref)
1438 return NULL;
1439
1440 p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1441 if (p) { 1439 if (p) {
1442 p->tbl = tbl; 1440 p->tbl = tbl;
1443 atomic_set(&p->refcnt, 1); 1441 atomic_set(&p->refcnt, 1);
1444 p->reachable_time = 1442 p->reachable_time =
1445 neigh_rand_reach_time(p->base_reachable_time); 1443 neigh_rand_reach_time(p->base_reachable_time);
1444 dev_hold(dev);
1445 p->dev = dev;
1446 write_pnet(&p->net, hold_net(net));
1447 p->sysctl_table = NULL;
1446 1448
1447 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { 1449 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1450 release_net(net);
1451 dev_put(dev);
1448 kfree(p); 1452 kfree(p);
1449 return NULL; 1453 return NULL;
1450 } 1454 }
1451 1455
1452 dev_hold(dev);
1453 p->dev = dev;
1454 write_pnet(&p->net, hold_net(net));
1455 p->sysctl_table = NULL;
1456 write_lock_bh(&tbl->lock); 1456 write_lock_bh(&tbl->lock);
1457 p->next = tbl->parms.next; 1457 p->next = tbl->parms.next;
1458 tbl->parms.next = p; 1458 tbl->parms.next = p;
@@ -2053,6 +2053,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
2053 } 2053 }
2054 } 2054 }
2055 2055
2056 err = -ENOENT;
2057 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2058 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2059 !net_eq(net, &init_net))
2060 goto errout_tbl_lock;
2061
2056 if (tb[NDTA_THRESH1]) 2062 if (tb[NDTA_THRESH1])
2057 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); 2063 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2058 2064
@@ -2753,23 +2759,22 @@ errout:
2753 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 2759 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2754} 2760}
2755 2761
2756#ifdef CONFIG_ARPD
2757void neigh_app_ns(struct neighbour *n) 2762void neigh_app_ns(struct neighbour *n)
2758{ 2763{
2759 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); 2764 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2760} 2765}
2761EXPORT_SYMBOL(neigh_app_ns); 2766EXPORT_SYMBOL(neigh_app_ns);
2762#endif /* CONFIG_ARPD */
2763 2767
2764#ifdef CONFIG_SYSCTL 2768#ifdef CONFIG_SYSCTL
2765static int zero; 2769static int zero;
2770static int int_max = INT_MAX;
2766static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 2771static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2767 2772
2768static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, 2773static int proc_unres_qlen(struct ctl_table *ctl, int write,
2769 size_t *lenp, loff_t *ppos) 2774 void __user *buffer, size_t *lenp, loff_t *ppos)
2770{ 2775{
2771 int size, ret; 2776 int size, ret;
2772 ctl_table tmp = *ctl; 2777 struct ctl_table tmp = *ctl;
2773 2778
2774 tmp.extra1 = &zero; 2779 tmp.extra1 = &zero;
2775 tmp.extra2 = &unres_qlen_max; 2780 tmp.extra2 = &unres_qlen_max;
@@ -2815,19 +2820,25 @@ static struct neigh_sysctl_table {
2815 .procname = "mcast_solicit", 2820 .procname = "mcast_solicit",
2816 .maxlen = sizeof(int), 2821 .maxlen = sizeof(int),
2817 .mode = 0644, 2822 .mode = 0644,
2818 .proc_handler = proc_dointvec, 2823 .extra1 = &zero,
2824 .extra2 = &int_max,
2825 .proc_handler = proc_dointvec_minmax,
2819 }, 2826 },
2820 [NEIGH_VAR_UCAST_PROBE] = { 2827 [NEIGH_VAR_UCAST_PROBE] = {
2821 .procname = "ucast_solicit", 2828 .procname = "ucast_solicit",
2822 .maxlen = sizeof(int), 2829 .maxlen = sizeof(int),
2823 .mode = 0644, 2830 .mode = 0644,
2824 .proc_handler = proc_dointvec, 2831 .extra1 = &zero,
2832 .extra2 = &int_max,
2833 .proc_handler = proc_dointvec_minmax,
2825 }, 2834 },
2826 [NEIGH_VAR_APP_PROBE] = { 2835 [NEIGH_VAR_APP_PROBE] = {
2827 .procname = "app_solicit", 2836 .procname = "app_solicit",
2828 .maxlen = sizeof(int), 2837 .maxlen = sizeof(int),
2829 .mode = 0644, 2838 .mode = 0644,
2830 .proc_handler = proc_dointvec, 2839 .extra1 = &zero,
2840 .extra2 = &int_max,
2841 .proc_handler = proc_dointvec_minmax,
2831 }, 2842 },
2832 [NEIGH_VAR_RETRANS_TIME] = { 2843 [NEIGH_VAR_RETRANS_TIME] = {
2833 .procname = "retrans_time", 2844 .procname = "retrans_time",
@@ -2870,7 +2881,9 @@ static struct neigh_sysctl_table {
2870 .procname = "proxy_qlen", 2881 .procname = "proxy_qlen",
2871 .maxlen = sizeof(int), 2882 .maxlen = sizeof(int),
2872 .mode = 0644, 2883 .mode = 0644,
2873 .proc_handler = proc_dointvec, 2884 .extra1 = &zero,
2885 .extra2 = &int_max,
2886 .proc_handler = proc_dointvec_minmax,
2874 }, 2887 },
2875 [NEIGH_VAR_ANYCAST_DELAY] = { 2888 [NEIGH_VAR_ANYCAST_DELAY] = {
2876 .procname = "anycast_delay", 2889 .procname = "anycast_delay",
@@ -2912,19 +2925,25 @@ static struct neigh_sysctl_table {
2912 .procname = "gc_thresh1", 2925 .procname = "gc_thresh1",
2913 .maxlen = sizeof(int), 2926 .maxlen = sizeof(int),
2914 .mode = 0644, 2927 .mode = 0644,
2915 .proc_handler = proc_dointvec, 2928 .extra1 = &zero,
2929 .extra2 = &int_max,
2930 .proc_handler = proc_dointvec_minmax,
2916 }, 2931 },
2917 [NEIGH_VAR_GC_THRESH2] = { 2932 [NEIGH_VAR_GC_THRESH2] = {
2918 .procname = "gc_thresh2", 2933 .procname = "gc_thresh2",
2919 .maxlen = sizeof(int), 2934 .maxlen = sizeof(int),
2920 .mode = 0644, 2935 .mode = 0644,
2921 .proc_handler = proc_dointvec, 2936 .extra1 = &zero,
2937 .extra2 = &int_max,
2938 .proc_handler = proc_dointvec_minmax,
2922 }, 2939 },
2923 [NEIGH_VAR_GC_THRESH3] = { 2940 [NEIGH_VAR_GC_THRESH3] = {
2924 .procname = "gc_thresh3", 2941 .procname = "gc_thresh3",
2925 .maxlen = sizeof(int), 2942 .maxlen = sizeof(int),
2926 .mode = 0644, 2943 .mode = 0644,
2927 .proc_handler = proc_dointvec, 2944 .extra1 = &zero,
2945 .extra2 = &int_max,
2946 .proc_handler = proc_dointvec_minmax,
2928 }, 2947 },
2929 {}, 2948 {},
2930 }, 2949 },
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 569d355fec3e..2bf83299600a 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -146,11 +146,23 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
146static int softnet_seq_show(struct seq_file *seq, void *v) 146static int softnet_seq_show(struct seq_file *seq, void *v)
147{ 147{
148 struct softnet_data *sd = v; 148 struct softnet_data *sd = v;
149 unsigned int flow_limit_count = 0;
149 150
150 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 151#ifdef CONFIG_NET_FLOW_LIMIT
152 struct sd_flow_limit *fl;
153
154 rcu_read_lock();
155 fl = rcu_dereference(sd->flow_limit);
156 if (fl)
157 flow_limit_count = fl->count;
158 rcu_read_unlock();
159#endif
160
161 seq_printf(seq,
162 "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
151 sd->processed, sd->dropped, sd->time_squeeze, 0, 163 sd->processed, sd->dropped, sd->time_squeeze, 0,
152 0, 0, 0, 0, /* was fastroute */ 164 0, 0, 0, 0, /* was fastroute */
153 sd->cpu_collision, sd->received_rps); 165 sd->cpu_collision, sd->received_rps, flow_limit_count);
154 return 0; 166 return 0;
155} 167}
156 168
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 981fed397d1d..d954b56b4e47 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -60,12 +60,19 @@ static ssize_t format_##field(const struct net_device *net, char *buf) \
60{ \ 60{ \
61 return sprintf(buf, format_string, net->field); \ 61 return sprintf(buf, format_string, net->field); \
62} \ 62} \
63static ssize_t show_##field(struct device *dev, \ 63static ssize_t field##_show(struct device *dev, \
64 struct device_attribute *attr, char *buf) \ 64 struct device_attribute *attr, char *buf) \
65{ \ 65{ \
66 return netdev_show(dev, attr, buf, format_##field); \ 66 return netdev_show(dev, attr, buf, format_##field); \
67} 67} \
68
69#define NETDEVICE_SHOW_RO(field, format_string) \
70NETDEVICE_SHOW(field, format_string); \
71static DEVICE_ATTR_RO(field)
68 72
73#define NETDEVICE_SHOW_RW(field, format_string) \
74NETDEVICE_SHOW(field, format_string); \
75static DEVICE_ATTR_RW(field)
69 76
70/* use same locking and permission rules as SIF* ioctl's */ 77/* use same locking and permission rules as SIF* ioctl's */
71static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, 78static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
@@ -96,16 +103,16 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
96 return ret; 103 return ret;
97} 104}
98 105
99NETDEVICE_SHOW(dev_id, fmt_hex); 106NETDEVICE_SHOW_RO(dev_id, fmt_hex);
100NETDEVICE_SHOW(addr_assign_type, fmt_dec); 107NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
101NETDEVICE_SHOW(addr_len, fmt_dec); 108NETDEVICE_SHOW_RO(addr_len, fmt_dec);
102NETDEVICE_SHOW(iflink, fmt_dec); 109NETDEVICE_SHOW_RO(iflink, fmt_dec);
103NETDEVICE_SHOW(ifindex, fmt_dec); 110NETDEVICE_SHOW_RO(ifindex, fmt_dec);
104NETDEVICE_SHOW(type, fmt_dec); 111NETDEVICE_SHOW_RO(type, fmt_dec);
105NETDEVICE_SHOW(link_mode, fmt_dec); 112NETDEVICE_SHOW_RO(link_mode, fmt_dec);
106 113
107/* use same locking rules as GIFHWADDR ioctl's */ 114/* use same locking rules as GIFHWADDR ioctl's */
108static ssize_t show_address(struct device *dev, struct device_attribute *attr, 115static ssize_t address_show(struct device *dev, struct device_attribute *attr,
109 char *buf) 116 char *buf)
110{ 117{
111 struct net_device *net = to_net_dev(dev); 118 struct net_device *net = to_net_dev(dev);
@@ -117,15 +124,17 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr,
117 read_unlock(&dev_base_lock); 124 read_unlock(&dev_base_lock);
118 return ret; 125 return ret;
119} 126}
127static DEVICE_ATTR_RO(address);
120 128
121static ssize_t show_broadcast(struct device *dev, 129static ssize_t broadcast_show(struct device *dev,
122 struct device_attribute *attr, char *buf) 130 struct device_attribute *attr, char *buf)
123{ 131{
124 struct net_device *net = to_net_dev(dev); 132 struct net_device *net = to_net_dev(dev);
125 if (dev_isalive(net)) 133 if (dev_isalive(net))
126 return sysfs_format_mac(buf, net->broadcast, net->addr_len); 134 return sysfs_format_mac(buf, net->broadcast, net->addr_len);
127 return -EINVAL; 135 return -EINVAL;
128} 136}
137static DEVICE_ATTR_RO(broadcast);
129 138
130static int change_carrier(struct net_device *net, unsigned long new_carrier) 139static int change_carrier(struct net_device *net, unsigned long new_carrier)
131{ 140{
@@ -134,13 +143,13 @@ static int change_carrier(struct net_device *net, unsigned long new_carrier)
134 return dev_change_carrier(net, (bool) new_carrier); 143 return dev_change_carrier(net, (bool) new_carrier);
135} 144}
136 145
137static ssize_t store_carrier(struct device *dev, struct device_attribute *attr, 146static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
138 const char *buf, size_t len) 147 const char *buf, size_t len)
139{ 148{
140 return netdev_store(dev, attr, buf, len, change_carrier); 149 return netdev_store(dev, attr, buf, len, change_carrier);
141} 150}
142 151
143static ssize_t show_carrier(struct device *dev, 152static ssize_t carrier_show(struct device *dev,
144 struct device_attribute *attr, char *buf) 153 struct device_attribute *attr, char *buf)
145{ 154{
146 struct net_device *netdev = to_net_dev(dev); 155 struct net_device *netdev = to_net_dev(dev);
@@ -149,8 +158,9 @@ static ssize_t show_carrier(struct device *dev,
149 } 158 }
150 return -EINVAL; 159 return -EINVAL;
151} 160}
161static DEVICE_ATTR_RW(carrier);
152 162
153static ssize_t show_speed(struct device *dev, 163static ssize_t speed_show(struct device *dev,
154 struct device_attribute *attr, char *buf) 164 struct device_attribute *attr, char *buf)
155{ 165{
156 struct net_device *netdev = to_net_dev(dev); 166 struct net_device *netdev = to_net_dev(dev);
@@ -167,8 +177,9 @@ static ssize_t show_speed(struct device *dev,
167 rtnl_unlock(); 177 rtnl_unlock();
168 return ret; 178 return ret;
169} 179}
180static DEVICE_ATTR_RO(speed);
170 181
171static ssize_t show_duplex(struct device *dev, 182static ssize_t duplex_show(struct device *dev,
172 struct device_attribute *attr, char *buf) 183 struct device_attribute *attr, char *buf)
173{ 184{
174 struct net_device *netdev = to_net_dev(dev); 185 struct net_device *netdev = to_net_dev(dev);
@@ -198,8 +209,9 @@ static ssize_t show_duplex(struct device *dev,
198 rtnl_unlock(); 209 rtnl_unlock();
199 return ret; 210 return ret;
200} 211}
212static DEVICE_ATTR_RO(duplex);
201 213
202static ssize_t show_dormant(struct device *dev, 214static ssize_t dormant_show(struct device *dev,
203 struct device_attribute *attr, char *buf) 215 struct device_attribute *attr, char *buf)
204{ 216{
205 struct net_device *netdev = to_net_dev(dev); 217 struct net_device *netdev = to_net_dev(dev);
@@ -209,6 +221,7 @@ static ssize_t show_dormant(struct device *dev,
209 221
210 return -EINVAL; 222 return -EINVAL;
211} 223}
224static DEVICE_ATTR_RO(dormant);
212 225
213static const char *const operstates[] = { 226static const char *const operstates[] = {
214 "unknown", 227 "unknown",
@@ -220,7 +233,7 @@ static const char *const operstates[] = {
220 "up" 233 "up"
221}; 234};
222 235
223static ssize_t show_operstate(struct device *dev, 236static ssize_t operstate_show(struct device *dev,
224 struct device_attribute *attr, char *buf) 237 struct device_attribute *attr, char *buf)
225{ 238{
226 const struct net_device *netdev = to_net_dev(dev); 239 const struct net_device *netdev = to_net_dev(dev);
@@ -237,35 +250,33 @@ static ssize_t show_operstate(struct device *dev,
237 250
238 return sprintf(buf, "%s\n", operstates[operstate]); 251 return sprintf(buf, "%s\n", operstates[operstate]);
239} 252}
253static DEVICE_ATTR_RO(operstate);
240 254
241/* read-write attributes */ 255/* read-write attributes */
242NETDEVICE_SHOW(mtu, fmt_dec);
243 256
244static int change_mtu(struct net_device *net, unsigned long new_mtu) 257static int change_mtu(struct net_device *net, unsigned long new_mtu)
245{ 258{
246 return dev_set_mtu(net, (int) new_mtu); 259 return dev_set_mtu(net, (int) new_mtu);
247} 260}
248 261
249static ssize_t store_mtu(struct device *dev, struct device_attribute *attr, 262static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
250 const char *buf, size_t len) 263 const char *buf, size_t len)
251{ 264{
252 return netdev_store(dev, attr, buf, len, change_mtu); 265 return netdev_store(dev, attr, buf, len, change_mtu);
253} 266}
254 267NETDEVICE_SHOW_RW(mtu, fmt_dec);
255NETDEVICE_SHOW(flags, fmt_hex);
256 268
257static int change_flags(struct net_device *net, unsigned long new_flags) 269static int change_flags(struct net_device *net, unsigned long new_flags)
258{ 270{
259 return dev_change_flags(net, (unsigned int) new_flags); 271 return dev_change_flags(net, (unsigned int) new_flags);
260} 272}
261 273
262static ssize_t store_flags(struct device *dev, struct device_attribute *attr, 274static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
263 const char *buf, size_t len) 275 const char *buf, size_t len)
264{ 276{
265 return netdev_store(dev, attr, buf, len, change_flags); 277 return netdev_store(dev, attr, buf, len, change_flags);
266} 278}
267 279NETDEVICE_SHOW_RW(flags, fmt_hex);
268NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
269 280
270static int change_tx_queue_len(struct net_device *net, unsigned long new_len) 281static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
271{ 282{
@@ -273,7 +284,7 @@ static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
273 return 0; 284 return 0;
274} 285}
275 286
276static ssize_t store_tx_queue_len(struct device *dev, 287static ssize_t tx_queue_len_store(struct device *dev,
277 struct device_attribute *attr, 288 struct device_attribute *attr,
278 const char *buf, size_t len) 289 const char *buf, size_t len)
279{ 290{
@@ -282,8 +293,9 @@ static ssize_t store_tx_queue_len(struct device *dev,
282 293
283 return netdev_store(dev, attr, buf, len, change_tx_queue_len); 294 return netdev_store(dev, attr, buf, len, change_tx_queue_len);
284} 295}
296NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);
285 297
286static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, 298static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
287 const char *buf, size_t len) 299 const char *buf, size_t len)
288{ 300{
289 struct net_device *netdev = to_net_dev(dev); 301 struct net_device *netdev = to_net_dev(dev);
@@ -306,7 +318,7 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
306 return ret < 0 ? ret : len; 318 return ret < 0 ? ret : len;
307} 319}
308 320
309static ssize_t show_ifalias(struct device *dev, 321static ssize_t ifalias_show(struct device *dev,
310 struct device_attribute *attr, char *buf) 322 struct device_attribute *attr, char *buf)
311{ 323{
312 const struct net_device *netdev = to_net_dev(dev); 324 const struct net_device *netdev = to_net_dev(dev);
@@ -319,8 +331,7 @@ static ssize_t show_ifalias(struct device *dev,
319 rtnl_unlock(); 331 rtnl_unlock();
320 return ret; 332 return ret;
321} 333}
322 334static DEVICE_ATTR_RW(ifalias);
323NETDEVICE_SHOW(group, fmt_dec);
324 335
325static int change_group(struct net_device *net, unsigned long new_group) 336static int change_group(struct net_device *net, unsigned long new_group)
326{ 337{
@@ -328,35 +339,60 @@ static int change_group(struct net_device *net, unsigned long new_group)
328 return 0; 339 return 0;
329} 340}
330 341
331static ssize_t store_group(struct device *dev, struct device_attribute *attr, 342static ssize_t group_store(struct device *dev, struct device_attribute *attr,
332 const char *buf, size_t len) 343 const char *buf, size_t len)
333{ 344{
334 return netdev_store(dev, attr, buf, len, change_group); 345 return netdev_store(dev, attr, buf, len, change_group);
335} 346}
347NETDEVICE_SHOW(group, fmt_dec);
348static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
349
350static ssize_t phys_port_id_show(struct device *dev,
351 struct device_attribute *attr, char *buf)
352{
353 struct net_device *netdev = to_net_dev(dev);
354 ssize_t ret = -EINVAL;
336 355
337static struct device_attribute net_class_attributes[] = { 356 if (!rtnl_trylock())
338 __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), 357 return restart_syscall();
339 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), 358
340 __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), 359 if (dev_isalive(netdev)) {
341 __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), 360 struct netdev_phys_port_id ppid;
342 __ATTR(iflink, S_IRUGO, show_iflink, NULL), 361
343 __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), 362 ret = dev_get_phys_port_id(netdev, &ppid);
344 __ATTR(type, S_IRUGO, show_type, NULL), 363 if (!ret)
345 __ATTR(link_mode, S_IRUGO, show_link_mode, NULL), 364 ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
346 __ATTR(address, S_IRUGO, show_address, NULL), 365 }
347 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), 366 rtnl_unlock();
348 __ATTR(carrier, S_IRUGO | S_IWUSR, show_carrier, store_carrier), 367
349 __ATTR(speed, S_IRUGO, show_speed, NULL), 368 return ret;
350 __ATTR(duplex, S_IRUGO, show_duplex, NULL), 369}
351 __ATTR(dormant, S_IRUGO, show_dormant, NULL), 370static DEVICE_ATTR_RO(phys_port_id);
352 __ATTR(operstate, S_IRUGO, show_operstate, NULL), 371
353 __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu), 372static struct attribute *net_class_attrs[] = {
354 __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), 373 &dev_attr_netdev_group.attr,
355 __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 374 &dev_attr_type.attr,
356 store_tx_queue_len), 375 &dev_attr_dev_id.attr,
357 __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group), 376 &dev_attr_iflink.attr,
358 {} 377 &dev_attr_ifindex.attr,
378 &dev_attr_addr_assign_type.attr,
379 &dev_attr_addr_len.attr,
380 &dev_attr_link_mode.attr,
381 &dev_attr_address.attr,
382 &dev_attr_broadcast.attr,
383 &dev_attr_speed.attr,
384 &dev_attr_duplex.attr,
385 &dev_attr_dormant.attr,
386 &dev_attr_operstate.attr,
387 &dev_attr_ifalias.attr,
388 &dev_attr_carrier.attr,
389 &dev_attr_mtu.attr,
390 &dev_attr_flags.attr,
391 &dev_attr_tx_queue_len.attr,
392 &dev_attr_phys_port_id.attr,
393 NULL,
359}; 394};
395ATTRIBUTE_GROUPS(net_class);
360 396
361/* Show a given an attribute in the statistics group */ 397/* Show a given an attribute in the statistics group */
362static ssize_t netstat_show(const struct device *d, 398static ssize_t netstat_show(const struct device *d,
@@ -382,13 +418,13 @@ static ssize_t netstat_show(const struct device *d,
382 418
383/* generate a read-only statistics attribute */ 419/* generate a read-only statistics attribute */
384#define NETSTAT_ENTRY(name) \ 420#define NETSTAT_ENTRY(name) \
385static ssize_t show_##name(struct device *d, \ 421static ssize_t name##_show(struct device *d, \
386 struct device_attribute *attr, char *buf) \ 422 struct device_attribute *attr, char *buf) \
387{ \ 423{ \
388 return netstat_show(d, attr, buf, \ 424 return netstat_show(d, attr, buf, \
389 offsetof(struct rtnl_link_stats64, name)); \ 425 offsetof(struct rtnl_link_stats64, name)); \
390} \ 426} \
391static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) 427static DEVICE_ATTR_RO(name)
392 428
393NETSTAT_ENTRY(rx_packets); 429NETSTAT_ENTRY(rx_packets);
394NETSTAT_ENTRY(tx_packets); 430NETSTAT_ENTRY(tx_packets);
@@ -457,6 +493,9 @@ static struct attribute_group wireless_group = {
457 .attrs = wireless_attrs, 493 .attrs = wireless_attrs,
458}; 494};
459#endif 495#endif
496
497#else /* CONFIG_SYSFS */
498#define net_class_groups NULL
460#endif /* CONFIG_SYSFS */ 499#endif /* CONFIG_SYSFS */
461 500
462#ifdef CONFIG_RPS 501#ifdef CONFIG_RPS
@@ -1157,6 +1196,13 @@ static void remove_queue_kobjects(struct net_device *net)
1157#endif 1196#endif
1158} 1197}
1159 1198
1199static bool net_current_may_mount(void)
1200{
1201 struct net *net = current->nsproxy->net_ns;
1202
1203 return ns_capable(net->user_ns, CAP_SYS_ADMIN);
1204}
1205
1160static void *net_grab_current_ns(void) 1206static void *net_grab_current_ns(void)
1161{ 1207{
1162 struct net *ns = current->nsproxy->net_ns; 1208 struct net *ns = current->nsproxy->net_ns;
@@ -1179,6 +1225,7 @@ static const void *net_netlink_ns(struct sock *sk)
1179 1225
1180struct kobj_ns_type_operations net_ns_type_operations = { 1226struct kobj_ns_type_operations net_ns_type_operations = {
1181 .type = KOBJ_NS_TYPE_NET, 1227 .type = KOBJ_NS_TYPE_NET,
1228 .current_may_mount = net_current_may_mount,
1182 .grab_current_ns = net_grab_current_ns, 1229 .grab_current_ns = net_grab_current_ns,
1183 .netlink_ns = net_netlink_ns, 1230 .netlink_ns = net_netlink_ns,
1184 .initial_ns = net_initial_ns, 1231 .initial_ns = net_initial_ns,
@@ -1229,9 +1276,7 @@ static const void *net_namespace(struct device *d)
1229static struct class net_class = { 1276static struct class net_class = {
1230 .name = "net", 1277 .name = "net",
1231 .dev_release = netdev_release, 1278 .dev_release = netdev_release,
1232#ifdef CONFIG_SYSFS 1279 .dev_groups = net_class_groups,
1233 .dev_attrs = net_class_attributes,
1234#endif /* CONFIG_SYSFS */
1235 .dev_uevent = netdev_uevent, 1280 .dev_uevent = netdev_uevent,
1236 .ns_type = &net_ns_type_operations, 1281 .ns_type = &net_ns_type_operations,
1237 .namespace = net_namespace, 1282 .namespace = net_namespace,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f97652036754..81d3a9a08453 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -651,7 +651,7 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
651 struct net *net = ns; 651 struct net *net = ns;
652 652
653 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || 653 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
654 !nsown_capable(CAP_SYS_ADMIN)) 654 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
655 return -EPERM; 655 return -EPERM;
656 656
657 put_net(nsproxy->net_ns); 657 put_net(nsproxy->net_ns);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index cec074be8c43..fc75c9e461b8 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -12,6 +12,7 @@
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 13
14#include <linux/moduleparam.h> 14#include <linux/moduleparam.h>
15#include <linux/kernel.h>
15#include <linux/netdevice.h> 16#include <linux/netdevice.h>
16#include <linux/etherdevice.h> 17#include <linux/etherdevice.h>
17#include <linux/string.h> 18#include <linux/string.h>
@@ -247,7 +248,7 @@ static void netpoll_poll_dev(struct net_device *dev)
247 zap_completion_queue(); 248 zap_completion_queue();
248} 249}
249 250
250int netpoll_rx_disable(struct net_device *dev) 251void netpoll_rx_disable(struct net_device *dev)
251{ 252{
252 struct netpoll_info *ni; 253 struct netpoll_info *ni;
253 int idx; 254 int idx;
@@ -257,7 +258,6 @@ int netpoll_rx_disable(struct net_device *dev)
257 if (ni) 258 if (ni)
258 down(&ni->dev_lock); 259 down(&ni->dev_lock);
259 srcu_read_unlock(&netpoll_srcu, idx); 260 srcu_read_unlock(&netpoll_srcu, idx);
260 return 0;
261} 261}
262EXPORT_SYMBOL(netpoll_rx_disable); 262EXPORT_SYMBOL(netpoll_rx_disable);
263 263
@@ -550,7 +550,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
550 return; 550 return;
551 551
552 proto = ntohs(eth_hdr(skb)->h_proto); 552 proto = ntohs(eth_hdr(skb)->h_proto);
553 if (proto == ETH_P_IP) { 553 if (proto == ETH_P_ARP) {
554 struct arphdr *arp; 554 struct arphdr *arp;
555 unsigned char *arp_ptr; 555 unsigned char *arp_ptr;
556 /* No arp on this interface */ 556 /* No arp on this interface */
@@ -690,25 +690,20 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
690 send_skb->dev = skb->dev; 690 send_skb->dev = skb->dev;
691 691
692 skb_reset_network_header(send_skb); 692 skb_reset_network_header(send_skb);
693 skb_put(send_skb, sizeof(struct ipv6hdr)); 693 hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr));
694 hdr = ipv6_hdr(send_skb);
695
696 *(__be32*)hdr = htonl(0x60000000); 694 *(__be32*)hdr = htonl(0x60000000);
697
698 hdr->payload_len = htons(size); 695 hdr->payload_len = htons(size);
699 hdr->nexthdr = IPPROTO_ICMPV6; 696 hdr->nexthdr = IPPROTO_ICMPV6;
700 hdr->hop_limit = 255; 697 hdr->hop_limit = 255;
701 hdr->saddr = *saddr; 698 hdr->saddr = *saddr;
702 hdr->daddr = *daddr; 699 hdr->daddr = *daddr;
703 700
704 send_skb->transport_header = send_skb->tail; 701 icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr));
705 skb_put(send_skb, size);
706
707 icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
708 icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; 702 icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
709 icmp6h->icmp6_router = 0; 703 icmp6h->icmp6_router = 0;
710 icmp6h->icmp6_solicited = 1; 704 icmp6h->icmp6_solicited = 1;
711 target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); 705
706 target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr));
712 *target = msg->target; 707 *target = msg->target;
713 icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, 708 icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
714 IPPROTO_ICMPV6, 709 IPPROTO_ICMPV6,
@@ -1289,15 +1284,14 @@ EXPORT_SYMBOL_GPL(__netpoll_free_async);
1289 1284
1290void netpoll_cleanup(struct netpoll *np) 1285void netpoll_cleanup(struct netpoll *np)
1291{ 1286{
1292 if (!np->dev)
1293 return;
1294
1295 rtnl_lock(); 1287 rtnl_lock();
1288 if (!np->dev)
1289 goto out;
1296 __netpoll_cleanup(np); 1290 __netpoll_cleanup(np);
1297 rtnl_unlock();
1298
1299 dev_put(np->dev); 1291 dev_put(np->dev);
1300 np->dev = NULL; 1292 np->dev = NULL;
1293out:
1294 rtnl_unlock();
1301} 1295}
1302EXPORT_SYMBOL(netpoll_cleanup); 1296EXPORT_SYMBOL(netpoll_cleanup);
1303 1297
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 0777d0aa18c3..d9cd627e6a16 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -29,12 +29,6 @@
29 29
30#define PRIOMAP_MIN_SZ 128 30#define PRIOMAP_MIN_SZ 128
31 31
32static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
33{
34 return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
35 struct cgroup_netprio_state, css);
36}
37
38/* 32/*
39 * Extend @dev->priomap so that it's large enough to accomodate 33 * Extend @dev->priomap so that it's large enough to accomodate
40 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful 34 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful
@@ -87,67 +81,70 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx)
87 81
88/** 82/**
89 * netprio_prio - return the effective netprio of a cgroup-net_device pair 83 * netprio_prio - return the effective netprio of a cgroup-net_device pair
90 * @cgrp: cgroup part of the target pair 84 * @css: css part of the target pair
91 * @dev: net_device part of the target pair 85 * @dev: net_device part of the target pair
92 * 86 *
93 * Should be called under RCU read or rtnl lock. 87 * Should be called under RCU read or rtnl lock.
94 */ 88 */
95static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev) 89static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev)
96{ 90{
97 struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); 91 struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
92 int id = css->cgroup->id;
98 93
99 if (map && cgrp->id < map->priomap_len) 94 if (map && id < map->priomap_len)
100 return map->priomap[cgrp->id]; 95 return map->priomap[id];
101 return 0; 96 return 0;
102} 97}
103 98
104/** 99/**
105 * netprio_set_prio - set netprio on a cgroup-net_device pair 100 * netprio_set_prio - set netprio on a cgroup-net_device pair
106 * @cgrp: cgroup part of the target pair 101 * @css: css part of the target pair
107 * @dev: net_device part of the target pair 102 * @dev: net_device part of the target pair
108 * @prio: prio to set 103 * @prio: prio to set
109 * 104 *
110 * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl 105 * Set netprio to @prio on @css-@dev pair. Should be called under rtnl
111 * lock and may fail under memory pressure for non-zero @prio. 106 * lock and may fail under memory pressure for non-zero @prio.
112 */ 107 */
113static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev, 108static int netprio_set_prio(struct cgroup_subsys_state *css,
114 u32 prio) 109 struct net_device *dev, u32 prio)
115{ 110{
116 struct netprio_map *map; 111 struct netprio_map *map;
112 int id = css->cgroup->id;
117 int ret; 113 int ret;
118 114
119 /* avoid extending priomap for zero writes */ 115 /* avoid extending priomap for zero writes */
120 map = rtnl_dereference(dev->priomap); 116 map = rtnl_dereference(dev->priomap);
121 if (!prio && (!map || map->priomap_len <= cgrp->id)) 117 if (!prio && (!map || map->priomap_len <= id))
122 return 0; 118 return 0;
123 119
124 ret = extend_netdev_table(dev, cgrp->id); 120 ret = extend_netdev_table(dev, id);
125 if (ret) 121 if (ret)
126 return ret; 122 return ret;
127 123
128 map = rtnl_dereference(dev->priomap); 124 map = rtnl_dereference(dev->priomap);
129 map->priomap[cgrp->id] = prio; 125 map->priomap[id] = prio;
130 return 0; 126 return 0;
131} 127}
132 128
133static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) 129static struct cgroup_subsys_state *
130cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
134{ 131{
135 struct cgroup_netprio_state *cs; 132 struct cgroup_subsys_state *css;
136 133
137 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 134 css = kzalloc(sizeof(*css), GFP_KERNEL);
138 if (!cs) 135 if (!css)
139 return ERR_PTR(-ENOMEM); 136 return ERR_PTR(-ENOMEM);
140 137
141 return &cs->css; 138 return css;
142} 139}
143 140
144static int cgrp_css_online(struct cgroup *cgrp) 141static int cgrp_css_online(struct cgroup_subsys_state *css)
145{ 142{
146 struct cgroup *parent = cgrp->parent; 143 struct cgroup_subsys_state *parent_css = css_parent(css);
147 struct net_device *dev; 144 struct net_device *dev;
148 int ret = 0; 145 int ret = 0;
149 146
150 if (!parent) 147 if (!parent_css)
151 return 0; 148 return 0;
152 149
153 rtnl_lock(); 150 rtnl_lock();
@@ -156,9 +153,9 @@ static int cgrp_css_online(struct cgroup *cgrp)
156 * onlining, there is no need to clear them on offline. 153 * onlining, there is no need to clear them on offline.
157 */ 154 */
158 for_each_netdev(&init_net, dev) { 155 for_each_netdev(&init_net, dev) {
159 u32 prio = netprio_prio(parent, dev); 156 u32 prio = netprio_prio(parent_css, dev);
160 157
161 ret = netprio_set_prio(cgrp, dev, prio); 158 ret = netprio_set_prio(css, dev, prio);
162 if (ret) 159 if (ret)
163 break; 160 break;
164 } 161 }
@@ -166,29 +163,29 @@ static int cgrp_css_online(struct cgroup *cgrp)
166 return ret; 163 return ret;
167} 164}
168 165
169static void cgrp_css_free(struct cgroup *cgrp) 166static void cgrp_css_free(struct cgroup_subsys_state *css)
170{ 167{
171 kfree(cgrp_netprio_state(cgrp)); 168 kfree(css);
172} 169}
173 170
174static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) 171static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
175{ 172{
176 return cgrp->id; 173 return css->cgroup->id;
177} 174}
178 175
179static int read_priomap(struct cgroup *cont, struct cftype *cft, 176static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
180 struct cgroup_map_cb *cb) 177 struct cgroup_map_cb *cb)
181{ 178{
182 struct net_device *dev; 179 struct net_device *dev;
183 180
184 rcu_read_lock(); 181 rcu_read_lock();
185 for_each_netdev_rcu(&init_net, dev) 182 for_each_netdev_rcu(&init_net, dev)
186 cb->fill(cb, dev->name, netprio_prio(cont, dev)); 183 cb->fill(cb, dev->name, netprio_prio(css, dev));
187 rcu_read_unlock(); 184 rcu_read_unlock();
188 return 0; 185 return 0;
189} 186}
190 187
191static int write_priomap(struct cgroup *cgrp, struct cftype *cft, 188static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
192 const char *buffer) 189 const char *buffer)
193{ 190{
194 char devname[IFNAMSIZ + 1]; 191 char devname[IFNAMSIZ + 1];
@@ -205,7 +202,7 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
205 202
206 rtnl_lock(); 203 rtnl_lock();
207 204
208 ret = netprio_set_prio(cgrp, dev, prio); 205 ret = netprio_set_prio(css, dev, prio);
209 206
210 rtnl_unlock(); 207 rtnl_unlock();
211 dev_put(dev); 208 dev_put(dev);
@@ -221,12 +218,13 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
221 return 0; 218 return 0;
222} 219}
223 220
224static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 221static void net_prio_attach(struct cgroup_subsys_state *css,
222 struct cgroup_taskset *tset)
225{ 223{
226 struct task_struct *p; 224 struct task_struct *p;
227 void *v; 225 void *v;
228 226
229 cgroup_taskset_for_each(p, cgrp, tset) { 227 cgroup_taskset_for_each(p, css, tset) {
230 task_lock(p); 228 task_lock(p);
231 v = (void *)(unsigned long)task_netprioidx(p); 229 v = (void *)(unsigned long)task_netprioidx(p);
232 iterate_fd(p->files, 0, update_netprio, v); 230 iterate_fd(p->files, 0, update_netprio, v);
@@ -261,7 +259,7 @@ struct cgroup_subsys net_prio_subsys = {
261static int netprio_device_event(struct notifier_block *unused, 259static int netprio_device_event(struct notifier_block *unused,
262 unsigned long event, void *ptr) 260 unsigned long event, void *ptr)
263{ 261{
264 struct net_device *dev = ptr; 262 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
265 struct netprio_map *old; 263 struct netprio_map *old;
266 264
267 /* 265 /*
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 11f2704c3810..261357a66300 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -160,6 +160,8 @@
160#include <net/net_namespace.h> 160#include <net/net_namespace.h>
161#include <net/checksum.h> 161#include <net/checksum.h>
162#include <net/ipv6.h> 162#include <net/ipv6.h>
163#include <net/udp.h>
164#include <net/ip6_checksum.h>
163#include <net/addrconf.h> 165#include <net/addrconf.h>
164#ifdef CONFIG_XFRM 166#ifdef CONFIG_XFRM
165#include <net/xfrm.h> 167#include <net/xfrm.h>
@@ -198,6 +200,7 @@
198#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ 200#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
199#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ 201#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
200#define F_NODE (1<<15) /* Node memory alloc*/ 202#define F_NODE (1<<15) /* Node memory alloc*/
203#define F_UDPCSUM (1<<16) /* Include UDP checksum */
201 204
202/* Thread control flag bits */ 205/* Thread control flag bits */
203#define T_STOP (1<<0) /* Stop run */ 206#define T_STOP (1<<0) /* Stop run */
@@ -631,6 +634,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
631 if (pkt_dev->flags & F_UDPDST_RND) 634 if (pkt_dev->flags & F_UDPDST_RND)
632 seq_printf(seq, "UDPDST_RND "); 635 seq_printf(seq, "UDPDST_RND ");
633 636
637 if (pkt_dev->flags & F_UDPCSUM)
638 seq_printf(seq, "UDPCSUM ");
639
634 if (pkt_dev->flags & F_MPLS_RND) 640 if (pkt_dev->flags & F_MPLS_RND)
635 seq_printf(seq, "MPLS_RND "); 641 seq_printf(seq, "MPLS_RND ");
636 642
@@ -1228,6 +1234,12 @@ static ssize_t pktgen_if_write(struct file *file,
1228 else if (strcmp(f, "!NODE_ALLOC") == 0) 1234 else if (strcmp(f, "!NODE_ALLOC") == 0)
1229 pkt_dev->flags &= ~F_NODE; 1235 pkt_dev->flags &= ~F_NODE;
1230 1236
1237 else if (strcmp(f, "UDPCSUM") == 0)
1238 pkt_dev->flags |= F_UDPCSUM;
1239
1240 else if (strcmp(f, "!UDPCSUM") == 0)
1241 pkt_dev->flags &= ~F_UDPCSUM;
1242
1231 else { 1243 else {
1232 sprintf(pg_result, 1244 sprintf(pg_result,
1233 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", 1245 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
@@ -1921,7 +1933,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
1921static int pktgen_device_event(struct notifier_block *unused, 1933static int pktgen_device_event(struct notifier_block *unused,
1922 unsigned long event, void *ptr) 1934 unsigned long event, void *ptr)
1923{ 1935{
1924 struct net_device *dev = ptr; 1936 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1925 struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id); 1937 struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id);
1926 1938
1927 if (pn->pktgen_exiting) 1939 if (pn->pktgen_exiting)
@@ -2627,6 +2639,29 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
2627 pgh->tv_usec = htonl(timestamp.tv_usec); 2639 pgh->tv_usec = htonl(timestamp.tv_usec);
2628} 2640}
2629 2641
2642static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
2643 struct pktgen_dev *pkt_dev,
2644 unsigned int extralen)
2645{
2646 struct sk_buff *skb = NULL;
2647 unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen +
2648 pkt_dev->pkt_overhead;
2649
2650 if (pkt_dev->flags & F_NODE) {
2651 int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id();
2652
2653 skb = __alloc_skb(NET_SKB_PAD + size, GFP_NOWAIT, 0, node);
2654 if (likely(skb)) {
2655 skb_reserve(skb, NET_SKB_PAD);
2656 skb->dev = dev;
2657 }
2658 } else {
2659 skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
2660 }
2661
2662 return skb;
2663}
2664
2630static struct sk_buff *fill_packet_ipv4(struct net_device *odev, 2665static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2631 struct pktgen_dev *pkt_dev) 2666 struct pktgen_dev *pkt_dev)
2632{ 2667{
@@ -2657,32 +2692,13 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2657 2692
2658 datalen = (odev->hard_header_len + 16) & ~0xf; 2693 datalen = (odev->hard_header_len + 16) & ~0xf;
2659 2694
2660 if (pkt_dev->flags & F_NODE) { 2695 skb = pktgen_alloc_skb(odev, pkt_dev, datalen);
2661 int node;
2662
2663 if (pkt_dev->node >= 0)
2664 node = pkt_dev->node;
2665 else
2666 node = numa_node_id();
2667
2668 skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
2669 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
2670 if (likely(skb)) {
2671 skb_reserve(skb, NET_SKB_PAD);
2672 skb->dev = odev;
2673 }
2674 }
2675 else
2676 skb = __netdev_alloc_skb(odev,
2677 pkt_dev->cur_pkt_size + 64
2678 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
2679
2680 if (!skb) { 2696 if (!skb) {
2681 sprintf(pkt_dev->result, "No memory"); 2697 sprintf(pkt_dev->result, "No memory");
2682 return NULL; 2698 return NULL;
2683 } 2699 }
2684 prefetchw(skb->data);
2685 2700
2701 prefetchw(skb->data);
2686 skb_reserve(skb, datalen); 2702 skb_reserve(skb, datalen);
2687 2703
2688 /* Reserve for ethernet and IP header */ 2704 /* Reserve for ethernet and IP header */
@@ -2708,15 +2724,15 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2708 *vlan_encapsulated_proto = htons(ETH_P_IP); 2724 *vlan_encapsulated_proto = htons(ETH_P_IP);
2709 } 2725 }
2710 2726
2711 skb->network_header = skb->tail; 2727 skb_set_mac_header(skb, 0);
2712 skb->transport_header = skb->network_header + sizeof(struct iphdr); 2728 skb_set_network_header(skb, skb->len);
2713 skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); 2729 iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr));
2730
2731 skb_set_transport_header(skb, skb->len);
2732 udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr));
2714 skb_set_queue_mapping(skb, queue_map); 2733 skb_set_queue_mapping(skb, queue_map);
2715 skb->priority = pkt_dev->skb_priority; 2734 skb->priority = pkt_dev->skb_priority;
2716 2735
2717 iph = ip_hdr(skb);
2718 udph = udp_hdr(skb);
2719
2720 memcpy(eth, pkt_dev->hh, 12); 2736 memcpy(eth, pkt_dev->hh, 12);
2721 *(__be16 *) & eth[12] = protocol; 2737 *(__be16 *) & eth[12] = protocol;
2722 2738
@@ -2729,7 +2745,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2729 udph->source = htons(pkt_dev->cur_udp_src); 2745 udph->source = htons(pkt_dev->cur_udp_src);
2730 udph->dest = htons(pkt_dev->cur_udp_dst); 2746 udph->dest = htons(pkt_dev->cur_udp_dst);
2731 udph->len = htons(datalen + 8); /* DATA + udphdr */ 2747 udph->len = htons(datalen + 8); /* DATA + udphdr */
2732 udph->check = 0; /* No checksum */ 2748 udph->check = 0;
2733 2749
2734 iph->ihl = 5; 2750 iph->ihl = 5;
2735 iph->version = 4; 2751 iph->version = 4;
@@ -2743,13 +2759,28 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2743 iph->frag_off = 0; 2759 iph->frag_off = 0;
2744 iplen = 20 + 8 + datalen; 2760 iplen = 20 + 8 + datalen;
2745 iph->tot_len = htons(iplen); 2761 iph->tot_len = htons(iplen);
2746 iph->check = 0; 2762 ip_send_check(iph);
2747 iph->check = ip_fast_csum((void *)iph, iph->ihl);
2748 skb->protocol = protocol; 2763 skb->protocol = protocol;
2749 skb->mac_header = (skb->network_header - ETH_HLEN -
2750 pkt_dev->pkt_overhead);
2751 skb->dev = odev; 2764 skb->dev = odev;
2752 skb->pkt_type = PACKET_HOST; 2765 skb->pkt_type = PACKET_HOST;
2766
2767 if (!(pkt_dev->flags & F_UDPCSUM)) {
2768 skb->ip_summed = CHECKSUM_NONE;
2769 } else if (odev->features & NETIF_F_V4_CSUM) {
2770 skb->ip_summed = CHECKSUM_PARTIAL;
2771 skb->csum = 0;
2772 udp4_hwcsum(skb, udph->source, udph->dest);
2773 } else {
2774 __wsum csum = udp_csum(skb);
2775
2776 /* add protocol-dependent pseudo-header */
2777 udph->check = csum_tcpudp_magic(udph->source, udph->dest,
2778 datalen + 8, IPPROTO_UDP, csum);
2779
2780 if (udph->check == 0)
2781 udph->check = CSUM_MANGLED_0;
2782 }
2783
2753 pktgen_finalize_skb(pkt_dev, skb, datalen); 2784 pktgen_finalize_skb(pkt_dev, skb, datalen);
2754 2785
2755#ifdef CONFIG_XFRM 2786#ifdef CONFIG_XFRM
@@ -2766,7 +2797,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2766 struct sk_buff *skb = NULL; 2797 struct sk_buff *skb = NULL;
2767 __u8 *eth; 2798 __u8 *eth;
2768 struct udphdr *udph; 2799 struct udphdr *udph;
2769 int datalen; 2800 int datalen, udplen;
2770 struct ipv6hdr *iph; 2801 struct ipv6hdr *iph;
2771 __be16 protocol = htons(ETH_P_IPV6); 2802 __be16 protocol = htons(ETH_P_IPV6);
2772 __be32 *mpls; 2803 __be32 *mpls;
@@ -2788,15 +2819,13 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2788 mod_cur_headers(pkt_dev); 2819 mod_cur_headers(pkt_dev);
2789 queue_map = pkt_dev->cur_queue_map; 2820 queue_map = pkt_dev->cur_queue_map;
2790 2821
2791 skb = __netdev_alloc_skb(odev, 2822 skb = pktgen_alloc_skb(odev, pkt_dev, 16);
2792 pkt_dev->cur_pkt_size + 64
2793 + 16 + pkt_dev->pkt_overhead, GFP_NOWAIT);
2794 if (!skb) { 2823 if (!skb) {
2795 sprintf(pkt_dev->result, "No memory"); 2824 sprintf(pkt_dev->result, "No memory");
2796 return NULL; 2825 return NULL;
2797 } 2826 }
2798 prefetchw(skb->data);
2799 2827
2828 prefetchw(skb->data);
2800 skb_reserve(skb, 16); 2829 skb_reserve(skb, 16);
2801 2830
2802 /* Reserve for ethernet and IP header */ 2831 /* Reserve for ethernet and IP header */
@@ -2822,13 +2851,14 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2822 *vlan_encapsulated_proto = htons(ETH_P_IPV6); 2851 *vlan_encapsulated_proto = htons(ETH_P_IPV6);
2823 } 2852 }
2824 2853
2825 skb->network_header = skb->tail; 2854 skb_set_mac_header(skb, 0);
2826 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); 2855 skb_set_network_header(skb, skb->len);
2827 skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); 2856 iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
2857
2858 skb_set_transport_header(skb, skb->len);
2859 udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr));
2828 skb_set_queue_mapping(skb, queue_map); 2860 skb_set_queue_mapping(skb, queue_map);
2829 skb->priority = pkt_dev->skb_priority; 2861 skb->priority = pkt_dev->skb_priority;
2830 iph = ipv6_hdr(skb);
2831 udph = udp_hdr(skb);
2832 2862
2833 memcpy(eth, pkt_dev->hh, 12); 2863 memcpy(eth, pkt_dev->hh, 12);
2834 *(__be16 *) &eth[12] = protocol; 2864 *(__be16 *) &eth[12] = protocol;
@@ -2843,10 +2873,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2843 net_info_ratelimited("increased datalen to %d\n", datalen); 2873 net_info_ratelimited("increased datalen to %d\n", datalen);
2844 } 2874 }
2845 2875
2876 udplen = datalen + sizeof(struct udphdr);
2846 udph->source = htons(pkt_dev->cur_udp_src); 2877 udph->source = htons(pkt_dev->cur_udp_src);
2847 udph->dest = htons(pkt_dev->cur_udp_dst); 2878 udph->dest = htons(pkt_dev->cur_udp_dst);
2848 udph->len = htons(datalen + sizeof(struct udphdr)); 2879 udph->len = htons(udplen);
2849 udph->check = 0; /* No checksum */ 2880 udph->check = 0;
2850 2881
2851 *(__be32 *) iph = htonl(0x60000000); /* Version + flow */ 2882 *(__be32 *) iph = htonl(0x60000000); /* Version + flow */
2852 2883
@@ -2857,18 +2888,33 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2857 2888
2858 iph->hop_limit = 32; 2889 iph->hop_limit = 32;
2859 2890
2860 iph->payload_len = htons(sizeof(struct udphdr) + datalen); 2891 iph->payload_len = htons(udplen);
2861 iph->nexthdr = IPPROTO_UDP; 2892 iph->nexthdr = IPPROTO_UDP;
2862 2893
2863 iph->daddr = pkt_dev->cur_in6_daddr; 2894 iph->daddr = pkt_dev->cur_in6_daddr;
2864 iph->saddr = pkt_dev->cur_in6_saddr; 2895 iph->saddr = pkt_dev->cur_in6_saddr;
2865 2896
2866 skb->mac_header = (skb->network_header - ETH_HLEN -
2867 pkt_dev->pkt_overhead);
2868 skb->protocol = protocol; 2897 skb->protocol = protocol;
2869 skb->dev = odev; 2898 skb->dev = odev;
2870 skb->pkt_type = PACKET_HOST; 2899 skb->pkt_type = PACKET_HOST;
2871 2900
2901 if (!(pkt_dev->flags & F_UDPCSUM)) {
2902 skb->ip_summed = CHECKSUM_NONE;
2903 } else if (odev->features & NETIF_F_V6_CSUM) {
2904 skb->ip_summed = CHECKSUM_PARTIAL;
2905 skb->csum_start = skb_transport_header(skb) - skb->head;
2906 skb->csum_offset = offsetof(struct udphdr, check);
2907 udph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, 0);
2908 } else {
2909 __wsum csum = udp_csum(skb);
2910
2911 /* add protocol-dependent pseudo-header */
2912 udph->check = csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, csum);
2913
2914 if (udph->check == 0)
2915 udph->check = CSUM_MANGLED_0;
2916 }
2917
2872 pktgen_finalize_skb(pkt_dev, skb, datalen); 2918 pktgen_finalize_skb(pkt_dev, skb, datalen);
2873 2919
2874 return skb; 2920 return skb;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a08bd2b7fe3f..2a0e21de3060 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -767,7 +767,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
767 + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ 767 + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
768 + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ 768 + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
769 + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ 769 + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
770 + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ 770 + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
771 + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
771} 772}
772 773
773static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) 774static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -846,6 +847,24 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
846 return 0; 847 return 0;
847} 848}
848 849
850static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
851{
852 int err;
853 struct netdev_phys_port_id ppid;
854
855 err = dev_get_phys_port_id(dev, &ppid);
856 if (err) {
857 if (err == -EOPNOTSUPP)
858 return 0;
859 return err;
860 }
861
862 if (nla_put(skb, IFLA_PHYS_PORT_ID, ppid.id_len, ppid.id))
863 return -EMSGSIZE;
864
865 return 0;
866}
867
849static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 868static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
850 int type, u32 pid, u32 seq, u32 change, 869 int type, u32 pid, u32 seq, u32 change,
851 unsigned int flags, u32 ext_filter_mask) 870 unsigned int flags, u32 ext_filter_mask)
@@ -913,6 +932,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
913 goto nla_put_failure; 932 goto nla_put_failure;
914 } 933 }
915 934
935 if (rtnl_phys_port_id_fill(skb, dev))
936 goto nla_put_failure;
937
916 attr = nla_reserve(skb, IFLA_STATS, 938 attr = nla_reserve(skb, IFLA_STATS,
917 sizeof(struct rtnl_link_stats)); 939 sizeof(struct rtnl_link_stats));
918 if (attr == NULL) 940 if (attr == NULL)
@@ -947,6 +969,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
947 struct ifla_vf_vlan vf_vlan; 969 struct ifla_vf_vlan vf_vlan;
948 struct ifla_vf_tx_rate vf_tx_rate; 970 struct ifla_vf_tx_rate vf_tx_rate;
949 struct ifla_vf_spoofchk vf_spoofchk; 971 struct ifla_vf_spoofchk vf_spoofchk;
972 struct ifla_vf_link_state vf_linkstate;
950 973
951 /* 974 /*
952 * Not all SR-IOV capable drivers support the 975 * Not all SR-IOV capable drivers support the
@@ -956,18 +979,24 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
956 */ 979 */
957 ivi.spoofchk = -1; 980 ivi.spoofchk = -1;
958 memset(ivi.mac, 0, sizeof(ivi.mac)); 981 memset(ivi.mac, 0, sizeof(ivi.mac));
982 /* The default value for VF link state is "auto"
983 * IFLA_VF_LINK_STATE_AUTO which equals zero
984 */
985 ivi.linkstate = 0;
959 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) 986 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
960 break; 987 break;
961 vf_mac.vf = 988 vf_mac.vf =
962 vf_vlan.vf = 989 vf_vlan.vf =
963 vf_tx_rate.vf = 990 vf_tx_rate.vf =
964 vf_spoofchk.vf = ivi.vf; 991 vf_spoofchk.vf =
992 vf_linkstate.vf = ivi.vf;
965 993
966 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); 994 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
967 vf_vlan.vlan = ivi.vlan; 995 vf_vlan.vlan = ivi.vlan;
968 vf_vlan.qos = ivi.qos; 996 vf_vlan.qos = ivi.qos;
969 vf_tx_rate.rate = ivi.tx_rate; 997 vf_tx_rate.rate = ivi.tx_rate;
970 vf_spoofchk.setting = ivi.spoofchk; 998 vf_spoofchk.setting = ivi.spoofchk;
999 vf_linkstate.link_state = ivi.linkstate;
971 vf = nla_nest_start(skb, IFLA_VF_INFO); 1000 vf = nla_nest_start(skb, IFLA_VF_INFO);
972 if (!vf) { 1001 if (!vf) {
973 nla_nest_cancel(skb, vfinfo); 1002 nla_nest_cancel(skb, vfinfo);
@@ -978,7 +1007,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
978 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), 1007 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
979 &vf_tx_rate) || 1008 &vf_tx_rate) ||
980 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), 1009 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
981 &vf_spoofchk)) 1010 &vf_spoofchk) ||
1011 nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
1012 &vf_linkstate))
982 goto nla_put_failure; 1013 goto nla_put_failure;
983 nla_nest_end(skb, vf); 1014 nla_nest_end(skb, vf);
984 } 1015 }
@@ -1104,6 +1135,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1104 [IFLA_PROMISCUITY] = { .type = NLA_U32 }, 1135 [IFLA_PROMISCUITY] = { .type = NLA_U32 },
1105 [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, 1136 [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
1106 [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, 1137 [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
1138 [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
1107}; 1139};
1108EXPORT_SYMBOL(ifla_policy); 1140EXPORT_SYMBOL(ifla_policy);
1109 1141
@@ -1238,6 +1270,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1238 ivs->setting); 1270 ivs->setting);
1239 break; 1271 break;
1240 } 1272 }
1273 case IFLA_VF_LINK_STATE: {
1274 struct ifla_vf_link_state *ivl;
1275 ivl = nla_data(vf);
1276 err = -EOPNOTSUPP;
1277 if (ops->ndo_set_vf_link_state)
1278 err = ops->ndo_set_vf_link_state(dev, ivl->vf,
1279 ivl->link_state);
1280 break;
1281 }
1241 default: 1282 default:
1242 err = -EINVAL; 1283 err = -EINVAL;
1243 break; 1284 break;
@@ -1826,10 +1867,10 @@ replay:
1826 else 1867 else
1827 err = register_netdevice(dev); 1868 err = register_netdevice(dev);
1828 1869
1829 if (err < 0 && !IS_ERR(dev)) 1870 if (err < 0) {
1830 free_netdev(dev); 1871 free_netdev(dev);
1831 if (err < 0)
1832 goto out; 1872 goto out;
1873 }
1833 1874
1834 err = rtnl_configure_link(dev, ifm); 1875 err = rtnl_configure_link(dev, ifm);
1835 if (err < 0) 1876 if (err < 0)
@@ -2091,10 +2132,6 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
2091 } 2132 }
2092 2133
2093 addr = nla_data(tb[NDA_LLADDR]); 2134 addr = nla_data(tb[NDA_LLADDR]);
2094 if (is_zero_ether_addr(addr)) {
2095 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");
2096 return -EINVAL;
2097 }
2098 2135
2099 err = -EOPNOTSUPP; 2136 err = -EOPNOTSUPP;
2100 2137
@@ -2142,7 +2179,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
2142 /* If aging addresses are supported device will need to 2179 /* If aging addresses are supported device will need to
2143 * implement its own handler for this. 2180 * implement its own handler for this.
2144 */ 2181 */
2145 if (ndm->ndm_state & NUD_PERMANENT) { 2182 if (!(ndm->ndm_state & NUD_PERMANENT)) {
2146 pr_info("%s: FDB only supports static addresses\n", dev->name); 2183 pr_info("%s: FDB only supports static addresses\n", dev->name);
2147 return -EINVAL; 2184 return -EINVAL;
2148 } 2185 }
@@ -2192,10 +2229,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
2192 } 2229 }
2193 2230
2194 addr = nla_data(tb[NDA_LLADDR]); 2231 addr = nla_data(tb[NDA_LLADDR]);
2195 if (is_zero_ether_addr(addr)) {
2196 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
2197 return -EINVAL;
2198 }
2199 2232
2200 err = -EOPNOTSUPP; 2233 err = -EOPNOTSUPP;
2201 2234
@@ -2374,7 +2407,7 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
2374 struct nlattr *extfilt; 2407 struct nlattr *extfilt;
2375 u32 filter_mask = 0; 2408 u32 filter_mask = 0;
2376 2409
2377 extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), 2410 extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg),
2378 IFLA_EXT_MASK); 2411 IFLA_EXT_MASK);
2379 if (extfilt) 2412 if (extfilt)
2380 filter_mask = nla_get_u32(extfilt); 2413 filter_mask = nla_get_u32(extfilt);
@@ -2667,7 +2700,7 @@ static void rtnetlink_rcv(struct sk_buff *skb)
2667 2700
2668static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) 2701static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
2669{ 2702{
2670 struct net_device *dev = ptr; 2703 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2671 2704
2672 switch (event) { 2705 switch (event) {
2673 case NETDEV_UP: 2706 case NETDEV_UP:
diff --git a/net/core/scm.c b/net/core/scm.c
index 03795d0147f2..b442e7e25e60 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -54,11 +54,11 @@ static __inline__ int scm_check_creds(struct ucred *creds)
54 return -EINVAL; 54 return -EINVAL;
55 55
56 if ((creds->pid == task_tgid_vnr(current) || 56 if ((creds->pid == task_tgid_vnr(current) ||
57 ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && 57 ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) &&
58 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || 58 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
59 uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && 59 uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) &&
60 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || 60 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
61 gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) { 61 gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) {
62 return 0; 62 return 0;
63 } 63 }
64 return -EPERM; 64 return -EPERM;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13cee86a..3f1ec1586ae1 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,11 +10,24 @@
10 10
11#include <net/secure_seq.h> 11#include <net/secure_seq.h>
12 12
13static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; 13#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
14 14
15void net_secret_init(void) 15static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
16
17static void net_secret_init(void)
16{ 18{
17 get_random_bytes(net_secret, sizeof(net_secret)); 19 u32 tmp;
20 int i;
21
22 if (likely(net_secret[0]))
23 return;
24
25 for (i = NET_SECRET_SIZE; i > 0;) {
26 do {
27 get_random_bytes(&tmp, sizeof(tmp));
28 } while (!tmp);
29 cmpxchg(&net_secret[--i], 0, tmp);
30 }
18} 31}
19 32
20#ifdef CONFIG_INET 33#ifdef CONFIG_INET
@@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
42 u32 hash[MD5_DIGEST_WORDS]; 55 u32 hash[MD5_DIGEST_WORDS];
43 u32 i; 56 u32 i;
44 57
58 net_secret_init();
45 memcpy(hash, saddr, 16); 59 memcpy(hash, saddr, 16);
46 for (i = 0; i < 4; i++) 60 for (i = 0; i < 4; i++)
47 secret[i] = net_secret[i] + (__force u32)daddr[i]; 61 secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
63 u32 hash[MD5_DIGEST_WORDS]; 77 u32 hash[MD5_DIGEST_WORDS];
64 u32 i; 78 u32 i;
65 79
80 net_secret_init();
66 memcpy(hash, saddr, 16); 81 memcpy(hash, saddr, 16);
67 for (i = 0; i < 4; i++) 82 for (i = 0; i < 4; i++)
68 secret[i] = net_secret[i] + (__force u32) daddr[i]; 83 secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr)
82{ 97{
83 u32 hash[MD5_DIGEST_WORDS]; 98 u32 hash[MD5_DIGEST_WORDS];
84 99
100 net_secret_init();
85 hash[0] = (__force __u32) daddr; 101 hash[0] = (__force __u32) daddr;
86 hash[1] = net_secret[13]; 102 hash[1] = net_secret[13];
87 hash[2] = net_secret[14]; 103 hash[2] = net_secret[14];
@@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
96{ 112{
97 __u32 hash[4]; 113 __u32 hash[4];
98 114
115 net_secret_init();
99 memcpy(hash, daddr, 16); 116 memcpy(hash, daddr, 16);
100 md5_transform(hash, net_secret); 117 md5_transform(hash, net_secret);
101 118
@@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
107{ 124{
108 u32 hash[MD5_DIGEST_WORDS]; 125 u32 hash[MD5_DIGEST_WORDS];
109 126
127 net_secret_init();
110 hash[0] = (__force u32)saddr; 128 hash[0] = (__force u32)saddr;
111 hash[1] = (__force u32)daddr; 129 hash[1] = (__force u32)daddr;
112 hash[2] = ((__force u16)sport << 16) + (__force u16)dport; 130 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
121{ 139{
122 u32 hash[MD5_DIGEST_WORDS]; 140 u32 hash[MD5_DIGEST_WORDS];
123 141
142 net_secret_init();
124 hash[0] = (__force u32)saddr; 143 hash[0] = (__force u32)saddr;
125 hash[1] = (__force u32)daddr; 144 hash[1] = (__force u32)daddr;
126 hash[2] = (__force u32)dport ^ net_secret[14]; 145 hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
140 u32 hash[MD5_DIGEST_WORDS]; 159 u32 hash[MD5_DIGEST_WORDS];
141 u64 seq; 160 u64 seq;
142 161
162 net_secret_init();
143 hash[0] = (__force u32)saddr; 163 hash[0] = (__force u32)saddr;
144 hash[1] = (__force u32)daddr; 164 hash[1] = (__force u32)daddr;
145 hash[2] = ((__force u16)sport << 16) + (__force u16)dport; 165 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
164 u64 seq; 184 u64 seq;
165 u32 i; 185 u32 i;
166 186
187 net_secret_init();
167 memcpy(hash, saddr, 16); 188 memcpy(hash, saddr, 16);
168 for (i = 0; i < 4; i++) 189 for (i = 0; i < 4; i++)
169 secret[i] = net_secret[i] + daddr[i]; 190 secret[i] = net_secret[i] + daddr[i];
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cfd777bd6bd0..d81cff119f73 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -199,9 +199,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
199 skb->truesize = sizeof(struct sk_buff); 199 skb->truesize = sizeof(struct sk_buff);
200 atomic_set(&skb->users, 1); 200 atomic_set(&skb->users, 1);
201 201
202#ifdef NET_SKBUFF_DATA_USES_OFFSET 202 skb->mac_header = (typeof(skb->mac_header))~0U;
203 skb->mac_header = ~0U;
204#endif
205out: 203out:
206 return skb; 204 return skb;
207} 205}
@@ -275,10 +273,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
275 skb->data = data; 273 skb->data = data;
276 skb_reset_tail_pointer(skb); 274 skb_reset_tail_pointer(skb);
277 skb->end = skb->tail + size; 275 skb->end = skb->tail + size;
278#ifdef NET_SKBUFF_DATA_USES_OFFSET 276 skb->mac_header = (typeof(skb->mac_header))~0U;
279 skb->mac_header = ~0U; 277 skb->transport_header = (typeof(skb->transport_header))~0U;
280 skb->transport_header = ~0U;
281#endif
282 278
283 /* make sure we initialize shinfo sequentially */ 279 /* make sure we initialize shinfo sequentially */
284 shinfo = skb_shinfo(skb); 280 shinfo = skb_shinfo(skb);
@@ -313,7 +309,8 @@ EXPORT_SYMBOL(__alloc_skb);
313 * @frag_size: size of fragment, or 0 if head was kmalloced 309 * @frag_size: size of fragment, or 0 if head was kmalloced
314 * 310 *
315 * Allocate a new &sk_buff. Caller provides space holding head and 311 * Allocate a new &sk_buff. Caller provides space holding head and
316 * skb_shared_info. @data must have been allocated by kmalloc() 312 * skb_shared_info. @data must have been allocated by kmalloc() only if
313 * @frag_size is 0, otherwise data should come from the page allocator.
317 * The return is the new skb buffer. 314 * The return is the new skb buffer.
318 * On a failure the return is %NULL, and @data is not freed. 315 * On a failure the return is %NULL, and @data is not freed.
319 * Notes : 316 * Notes :
@@ -344,10 +341,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
344 skb->data = data; 341 skb->data = data;
345 skb_reset_tail_pointer(skb); 342 skb_reset_tail_pointer(skb);
346 skb->end = skb->tail + size; 343 skb->end = skb->tail + size;
347#ifdef NET_SKBUFF_DATA_USES_OFFSET 344 skb->mac_header = (typeof(skb->mac_header))~0U;
348 skb->mac_header = ~0U; 345 skb->transport_header = (typeof(skb->transport_header))~0U;
349 skb->transport_header = ~0U;
350#endif
351 346
352 /* make sure we initialize shinfo sequentially */ 347 /* make sure we initialize shinfo sequentially */
353 shinfo = skb_shinfo(skb); 348 shinfo = skb_shinfo(skb);
@@ -483,15 +478,8 @@ EXPORT_SYMBOL(skb_add_rx_frag);
483 478
484static void skb_drop_list(struct sk_buff **listp) 479static void skb_drop_list(struct sk_buff **listp)
485{ 480{
486 struct sk_buff *list = *listp; 481 kfree_skb_list(*listp);
487
488 *listp = NULL; 482 *listp = NULL;
489
490 do {
491 struct sk_buff *this = list;
492 list = list->next;
493 kfree_skb(this);
494 } while (list);
495} 483}
496 484
497static inline void skb_drop_fraglist(struct sk_buff *skb) 485static inline void skb_drop_fraglist(struct sk_buff *skb)
@@ -651,6 +639,17 @@ void kfree_skb(struct sk_buff *skb)
651} 639}
652EXPORT_SYMBOL(kfree_skb); 640EXPORT_SYMBOL(kfree_skb);
653 641
642void kfree_skb_list(struct sk_buff *segs)
643{
644 while (segs) {
645 struct sk_buff *next = segs->next;
646
647 kfree_skb(segs);
648 segs = next;
649 }
650}
651EXPORT_SYMBOL(kfree_skb_list);
652
654/** 653/**
655 * skb_tx_error - report an sk_buff xmit error 654 * skb_tx_error - report an sk_buff xmit error
656 * @skb: buffer that triggered an error 655 * @skb: buffer that triggered an error
@@ -699,6 +698,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
699 new->transport_header = old->transport_header; 698 new->transport_header = old->transport_header;
700 new->network_header = old->network_header; 699 new->network_header = old->network_header;
701 new->mac_header = old->mac_header; 700 new->mac_header = old->mac_header;
701 new->inner_protocol = old->inner_protocol;
702 new->inner_transport_header = old->inner_transport_header; 702 new->inner_transport_header = old->inner_transport_header;
703 new->inner_network_header = old->inner_network_header; 703 new->inner_network_header = old->inner_network_header;
704 new->inner_mac_header = old->inner_mac_header; 704 new->inner_mac_header = old->inner_mac_header;
@@ -739,6 +739,10 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
739 new->vlan_tci = old->vlan_tci; 739 new->vlan_tci = old->vlan_tci;
740 740
741 skb_copy_secmark(new, old); 741 skb_copy_secmark(new, old);
742
743#ifdef CONFIG_NET_RX_BUSY_POLL
744 new->napi_id = old->napi_id;
745#endif
742} 746}
743 747
744/* 748/*
@@ -821,7 +825,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
821 page = alloc_page(gfp_mask); 825 page = alloc_page(gfp_mask);
822 if (!page) { 826 if (!page) {
823 while (head) { 827 while (head) {
824 struct page *next = (struct page *)head->private; 828 struct page *next = (struct page *)page_private(head);
825 put_page(head); 829 put_page(head);
826 head = next; 830 head = next;
827 } 831 }
@@ -831,7 +835,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
831 memcpy(page_address(page), 835 memcpy(page_address(page),
832 vaddr + f->page_offset, skb_frag_size(f)); 836 vaddr + f->page_offset, skb_frag_size(f));
833 kunmap_atomic(vaddr); 837 kunmap_atomic(vaddr);
834 page->private = (unsigned long)head; 838 set_page_private(page, (unsigned long)head);
835 head = page; 839 head = page;
836 } 840 }
837 841
@@ -845,7 +849,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
845 for (i = num_frags - 1; i >= 0; i--) { 849 for (i = num_frags - 1; i >= 0; i--) {
846 __skb_fill_page_desc(skb, i, head, 0, 850 __skb_fill_page_desc(skb, i, head, 0,
847 skb_shinfo(skb)->frags[i].size); 851 skb_shinfo(skb)->frags[i].size);
848 head = (struct page *)head->private; 852 head = (struct page *)page_private(head);
849 } 853 }
850 854
851 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; 855 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
@@ -911,18 +915,8 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off)
911 915
912static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 916static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
913{ 917{
914#ifndef NET_SKBUFF_DATA_USES_OFFSET
915 /*
916 * Shift between the two data areas in bytes
917 */
918 unsigned long offset = new->data - old->data;
919#endif
920
921 __copy_skb_header(new, old); 918 __copy_skb_header(new, old);
922 919
923#ifndef NET_SKBUFF_DATA_USES_OFFSET
924 skb_headers_offset_update(new, offset);
925#endif
926 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; 920 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
927 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; 921 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
928 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; 922 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
@@ -1114,7 +1108,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1114 skb->end = skb->head + size; 1108 skb->end = skb->head + size;
1115#endif 1109#endif
1116 skb->tail += off; 1110 skb->tail += off;
1117 skb_headers_offset_update(skb, off); 1111 skb_headers_offset_update(skb, nhead);
1118 /* Only adjust this if it actually is csum_start rather than csum */ 1112 /* Only adjust this if it actually is csum_start rather than csum */
1119 if (skb->ip_summed == CHECKSUM_PARTIAL) 1113 if (skb->ip_summed == CHECKSUM_PARTIAL)
1120 skb->csum_start += nhead; 1114 skb->csum_start += nhead;
@@ -1209,9 +1203,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1209 off = newheadroom - oldheadroom; 1203 off = newheadroom - oldheadroom;
1210 if (n->ip_summed == CHECKSUM_PARTIAL) 1204 if (n->ip_summed == CHECKSUM_PARTIAL)
1211 n->csum_start += off; 1205 n->csum_start += off;
1212#ifdef NET_SKBUFF_DATA_USES_OFFSET 1206
1213 skb_headers_offset_update(n, off); 1207 skb_headers_offset_update(n, off);
1214#endif
1215 1208
1216 return n; 1209 return n;
1217} 1210}
@@ -2554,8 +2547,13 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
2554 unsigned int block_limit, abs_offset = consumed + st->lower_offset; 2547 unsigned int block_limit, abs_offset = consumed + st->lower_offset;
2555 skb_frag_t *frag; 2548 skb_frag_t *frag;
2556 2549
2557 if (unlikely(abs_offset >= st->upper_offset)) 2550 if (unlikely(abs_offset >= st->upper_offset)) {
2551 if (st->frag_data) {
2552 kunmap_atomic(st->frag_data);
2553 st->frag_data = NULL;
2554 }
2558 return 0; 2555 return 0;
2556 }
2559 2557
2560next_skb: 2558next_skb:
2561 block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; 2559 block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
@@ -2853,7 +2851,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2853 doffset + tnl_hlen); 2851 doffset + tnl_hlen);
2854 2852
2855 if (fskb != skb_shinfo(skb)->frag_list) 2853 if (fskb != skb_shinfo(skb)->frag_list)
2856 continue; 2854 goto perform_csum_check;
2857 2855
2858 if (!sg) { 2856 if (!sg) {
2859 nskb->ip_summed = CHECKSUM_NONE; 2857 nskb->ip_summed = CHECKSUM_NONE;
@@ -2917,6 +2915,7 @@ skip_fraglist:
2917 nskb->len += nskb->data_len; 2915 nskb->len += nskb->data_len;
2918 nskb->truesize += nskb->data_len; 2916 nskb->truesize += nskb->data_len;
2919 2917
2918perform_csum_check:
2920 if (!csum) { 2919 if (!csum) {
2921 nskb->csum = skb_checksum(nskb, doffset, 2920 nskb->csum = skb_checksum(nskb, doffset,
2922 nskb->len - doffset, 0); 2921 nskb->len - doffset, 0);
@@ -3499,3 +3498,31 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
3499 return true; 3498 return true;
3500} 3499}
3501EXPORT_SYMBOL(skb_try_coalesce); 3500EXPORT_SYMBOL(skb_try_coalesce);
3501
3502/**
3503 * skb_scrub_packet - scrub an skb
3504 *
3505 * @skb: buffer to clean
3506 * @xnet: packet is crossing netns
3507 *
3508 * skb_scrub_packet can be used after encapsulating or decapsulting a packet
3509 * into/from a tunnel. Some information have to be cleared during these
3510 * operations.
3511 * skb_scrub_packet can also be used to clean a skb before injecting it in
3512 * another namespace (@xnet == true). We have to clear all information in the
3513 * skb that could impact namespace isolation.
3514 */
3515void skb_scrub_packet(struct sk_buff *skb, bool xnet)
3516{
3517 if (xnet)
3518 skb_orphan(skb);
3519 skb->tstamp.tv64 = 0;
3520 skb->pkt_type = PACKET_HOST;
3521 skb->skb_iif = 0;
3522 skb_dst_drop(skb);
3523 skb->mark = 0;
3524 secpath_reset(skb);
3525 nf_reset(skb);
3526 nf_reset_trace(skb);
3527}
3528EXPORT_SYMBOL_GPL(skb_scrub_packet);
diff --git a/net/core/sock.c b/net/core/sock.c
index 88868a9d21da..5b6beba494a3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -93,6 +93,7 @@
93 93
94#include <linux/capability.h> 94#include <linux/capability.h>
95#include <linux/errno.h> 95#include <linux/errno.h>
96#include <linux/errqueue.h>
96#include <linux/types.h> 97#include <linux/types.h>
97#include <linux/socket.h> 98#include <linux/socket.h>
98#include <linux/in.h> 99#include <linux/in.h>
@@ -139,6 +140,8 @@
139#include <net/tcp.h> 140#include <net/tcp.h>
140#endif 141#endif
141 142
143#include <net/busy_poll.h>
144
142static DEFINE_MUTEX(proto_list_mutex); 145static DEFINE_MUTEX(proto_list_mutex);
143static LIST_HEAD(proto_list); 146static LIST_HEAD(proto_list);
144 147
@@ -571,9 +574,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
571 int ret = -ENOPROTOOPT; 574 int ret = -ENOPROTOOPT;
572#ifdef CONFIG_NETDEVICES 575#ifdef CONFIG_NETDEVICES
573 struct net *net = sock_net(sk); 576 struct net *net = sock_net(sk);
574 struct net_device *dev;
575 char devname[IFNAMSIZ]; 577 char devname[IFNAMSIZ];
576 unsigned seq;
577 578
578 if (sk->sk_bound_dev_if == 0) { 579 if (sk->sk_bound_dev_if == 0) {
579 len = 0; 580 len = 0;
@@ -584,20 +585,9 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
584 if (len < IFNAMSIZ) 585 if (len < IFNAMSIZ)
585 goto out; 586 goto out;
586 587
587retry: 588 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
588 seq = read_seqcount_begin(&devnet_rename_seq); 589 if (ret)
589 rcu_read_lock();
590 dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
591 ret = -ENODEV;
592 if (!dev) {
593 rcu_read_unlock();
594 goto out; 590 goto out;
595 }
596
597 strcpy(devname, dev->name);
598 rcu_read_unlock();
599 if (read_seqcount_retry(&devnet_rename_seq, seq))
600 goto retry;
601 591
602 len = strlen(devname) + 1; 592 len = strlen(devname) + 1;
603 593
@@ -911,6 +901,19 @@ set_rcvbuf:
911 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); 901 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
912 break; 902 break;
913 903
904#ifdef CONFIG_NET_RX_BUSY_POLL
905 case SO_BUSY_POLL:
906 /* allow unprivileged users to decrease the value */
907 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
908 ret = -EPERM;
909 else {
910 if (val < 0)
911 ret = -EINVAL;
912 else
913 sk->sk_ll_usec = val;
914 }
915 break;
916#endif
914 default: 917 default:
915 ret = -ENOPROTOOPT; 918 ret = -ENOPROTOOPT;
916 break; 919 break;
@@ -1168,6 +1171,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1168 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); 1171 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1169 break; 1172 break;
1170 1173
1174#ifdef CONFIG_NET_RX_BUSY_POLL
1175 case SO_BUSY_POLL:
1176 v.val = sk->sk_ll_usec;
1177 break;
1178#endif
1179
1171 default: 1180 default:
1172 return -ENOPROTOOPT; 1181 return -ENOPROTOOPT;
1173 } 1182 }
@@ -1567,6 +1576,25 @@ void sock_wfree(struct sk_buff *skb)
1567} 1576}
1568EXPORT_SYMBOL(sock_wfree); 1577EXPORT_SYMBOL(sock_wfree);
1569 1578
1579void skb_orphan_partial(struct sk_buff *skb)
1580{
1581 /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
1582 * so we do not completely orphan skb, but transfert all
1583 * accounted bytes but one, to avoid unexpected reorders.
1584 */
1585 if (skb->destructor == sock_wfree
1586#ifdef CONFIG_INET
1587 || skb->destructor == tcp_wfree
1588#endif
1589 ) {
1590 atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
1591 skb->truesize = 1;
1592 } else {
1593 skb_orphan(skb);
1594 }
1595}
1596EXPORT_SYMBOL(skb_orphan_partial);
1597
1570/* 1598/*
1571 * Read buffer destructor automatically called from kfree_skb. 1599 * Read buffer destructor automatically called from kfree_skb.
1572 */ 1600 */
@@ -1713,24 +1741,23 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
1713 1741
1714struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, 1742struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1715 unsigned long data_len, int noblock, 1743 unsigned long data_len, int noblock,
1716 int *errcode) 1744 int *errcode, int max_page_order)
1717{ 1745{
1718 struct sk_buff *skb; 1746 struct sk_buff *skb = NULL;
1747 unsigned long chunk;
1719 gfp_t gfp_mask; 1748 gfp_t gfp_mask;
1720 long timeo; 1749 long timeo;
1721 int err; 1750 int err;
1722 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; 1751 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1752 struct page *page;
1753 int i;
1723 1754
1724 err = -EMSGSIZE; 1755 err = -EMSGSIZE;
1725 if (npages > MAX_SKB_FRAGS) 1756 if (npages > MAX_SKB_FRAGS)
1726 goto failure; 1757 goto failure;
1727 1758
1728 gfp_mask = sk->sk_allocation;
1729 if (gfp_mask & __GFP_WAIT)
1730 gfp_mask |= __GFP_REPEAT;
1731
1732 timeo = sock_sndtimeo(sk, noblock); 1759 timeo = sock_sndtimeo(sk, noblock);
1733 while (1) { 1760 while (!skb) {
1734 err = sock_error(sk); 1761 err = sock_error(sk);
1735 if (err != 0) 1762 if (err != 0)
1736 goto failure; 1763 goto failure;
@@ -1739,50 +1766,52 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1739 if (sk->sk_shutdown & SEND_SHUTDOWN) 1766 if (sk->sk_shutdown & SEND_SHUTDOWN)
1740 goto failure; 1767 goto failure;
1741 1768
1742 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1769 if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
1743 skb = alloc_skb(header_len, gfp_mask); 1770 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1744 if (skb) { 1771 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1745 int i; 1772 err = -EAGAIN;
1746 1773 if (!timeo)
1747 /* No pages, we're done... */ 1774 goto failure;
1748 if (!data_len) 1775 if (signal_pending(current))
1749 break; 1776 goto interrupted;
1750 1777 timeo = sock_wait_for_wmem(sk, timeo);
1751 skb->truesize += data_len; 1778 continue;
1752 skb_shinfo(skb)->nr_frags = npages; 1779 }
1753 for (i = 0; i < npages; i++) {
1754 struct page *page;
1755
1756 page = alloc_pages(sk->sk_allocation, 0);
1757 if (!page) {
1758 err = -ENOBUFS;
1759 skb_shinfo(skb)->nr_frags = i;
1760 kfree_skb(skb);
1761 goto failure;
1762 }
1763
1764 __skb_fill_page_desc(skb, i,
1765 page, 0,
1766 (data_len >= PAGE_SIZE ?
1767 PAGE_SIZE :
1768 data_len));
1769 data_len -= PAGE_SIZE;
1770 }
1771 1780
1772 /* Full success... */ 1781 err = -ENOBUFS;
1773 break; 1782 gfp_mask = sk->sk_allocation;
1774 } 1783 if (gfp_mask & __GFP_WAIT)
1775 err = -ENOBUFS; 1784 gfp_mask |= __GFP_REPEAT;
1785
1786 skb = alloc_skb(header_len, gfp_mask);
1787 if (!skb)
1776 goto failure; 1788 goto failure;
1789
1790 skb->truesize += data_len;
1791
1792 for (i = 0; npages > 0; i++) {
1793 int order = max_page_order;
1794
1795 while (order) {
1796 if (npages >= 1 << order) {
1797 page = alloc_pages(sk->sk_allocation |
1798 __GFP_COMP | __GFP_NOWARN,
1799 order);
1800 if (page)
1801 goto fill_page;
1802 }
1803 order--;
1804 }
1805 page = alloc_page(sk->sk_allocation);
1806 if (!page)
1807 goto failure;
1808fill_page:
1809 chunk = min_t(unsigned long, data_len,
1810 PAGE_SIZE << order);
1811 skb_fill_page_desc(skb, i, page, 0, chunk);
1812 data_len -= chunk;
1813 npages -= 1 << order;
1777 } 1814 }
1778 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1779 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1780 err = -EAGAIN;
1781 if (!timeo)
1782 goto failure;
1783 if (signal_pending(current))
1784 goto interrupted;
1785 timeo = sock_wait_for_wmem(sk, timeo);
1786 } 1815 }
1787 1816
1788 skb_set_owner_w(skb, sk); 1817 skb_set_owner_w(skb, sk);
@@ -1791,6 +1820,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1791interrupted: 1820interrupted:
1792 err = sock_intr_errno(timeo); 1821 err = sock_intr_errno(timeo);
1793failure: 1822failure:
1823 kfree_skb(skb);
1794 *errcode = err; 1824 *errcode = err;
1795 return NULL; 1825 return NULL;
1796} 1826}
@@ -1799,7 +1829,7 @@ EXPORT_SYMBOL(sock_alloc_send_pskb);
1799struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 1829struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1800 int noblock, int *errcode) 1830 int noblock, int *errcode)
1801{ 1831{
1802 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); 1832 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
1803} 1833}
1804EXPORT_SYMBOL(sock_alloc_send_skb); 1834EXPORT_SYMBOL(sock_alloc_send_skb);
1805 1835
@@ -2284,6 +2314,11 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2284 2314
2285 sk->sk_stamp = ktime_set(-1L, 0); 2315 sk->sk_stamp = ktime_set(-1L, 0);
2286 2316
2317#ifdef CONFIG_NET_RX_BUSY_POLL
2318 sk->sk_napi_id = 0;
2319 sk->sk_ll_usec = sysctl_net_busy_read;
2320#endif
2321
2287 /* 2322 /*
2288 * Before updating sk_refcnt, we must commit prior changes to memory 2323 * Before updating sk_refcnt, we must commit prior changes to memory
2289 * (Documentation/RCU/rculist_nulls.txt for details) 2324 * (Documentation/RCU/rculist_nulls.txt for details)
@@ -2412,6 +2447,52 @@ void sock_enable_timestamp(struct sock *sk, int flag)
2412 } 2447 }
2413} 2448}
2414 2449
2450int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2451 int level, int type)
2452{
2453 struct sock_exterr_skb *serr;
2454 struct sk_buff *skb, *skb2;
2455 int copied, err;
2456
2457 err = -EAGAIN;
2458 skb = skb_dequeue(&sk->sk_error_queue);
2459 if (skb == NULL)
2460 goto out;
2461
2462 copied = skb->len;
2463 if (copied > len) {
2464 msg->msg_flags |= MSG_TRUNC;
2465 copied = len;
2466 }
2467 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2468 if (err)
2469 goto out_free_skb;
2470
2471 sock_recv_timestamp(msg, sk, skb);
2472
2473 serr = SKB_EXT_ERR(skb);
2474 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2475
2476 msg->msg_flags |= MSG_ERRQUEUE;
2477 err = copied;
2478
2479 /* Reset and regenerate socket error */
2480 spin_lock_bh(&sk->sk_error_queue.lock);
2481 sk->sk_err = 0;
2482 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
2483 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
2484 spin_unlock_bh(&sk->sk_error_queue.lock);
2485 sk->sk_error_report(sk);
2486 } else
2487 spin_unlock_bh(&sk->sk_error_queue.lock);
2488
2489out_free_skb:
2490 kfree_skb(skb);
2491out:
2492 return err;
2493}
2494EXPORT_SYMBOL(sock_recv_errqueue);
2495
2415/* 2496/*
2416 * Get a socket option on an socket. 2497 * Get a socket option on an socket.
2417 * 2498 *
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index d5bef0b0f639..a0e9cf6379de 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -73,8 +73,13 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
73 goto out; 73 goto out;
74 } 74 }
75 75
76 if (filter) 76 if (filter) {
77 memcpy(nla_data(attr), filter->insns, len); 77 struct sock_filter *fb = (struct sock_filter *)nla_data(attr);
78 int i;
79
80 for (i = 0; i < filter->len; i++, fb++)
81 sk_decode_filter(&filter->insns[i], fb);
82 }
78 83
79out: 84out:
80 rcu_read_unlock(); 85 rcu_read_unlock();
diff --git a/net/core/stream.c b/net/core/stream.c
index f5df85dcd20b..512f0a24269b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -30,7 +30,7 @@ void sk_stream_write_space(struct sock *sk)
30 struct socket *sock = sk->sk_socket; 30 struct socket *sock = sk->sk_socket;
31 struct socket_wq *wq; 31 struct socket_wq *wq;
32 32
33 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { 33 if (sk_stream_is_writeable(sk) && sock) {
34 clear_bit(SOCK_NOSPACE, &sock->flags); 34 clear_bit(SOCK_NOSPACE, &sock->flags);
35 35
36 rcu_read_lock(); 36 rcu_read_lock();
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cfdb46ab3a7f..cca444190907 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -19,16 +19,20 @@
19#include <net/ip.h> 19#include <net/ip.h>
20#include <net/sock.h> 20#include <net/sock.h>
21#include <net/net_ratelimit.h> 21#include <net/net_ratelimit.h>
22#include <net/busy_poll.h>
23#include <net/pkt_sched.h>
22 24
25static int zero = 0;
23static int one = 1; 26static int one = 1;
27static int ushort_max = USHRT_MAX;
24 28
25#ifdef CONFIG_RPS 29#ifdef CONFIG_RPS
26static int rps_sock_flow_sysctl(ctl_table *table, int write, 30static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
27 void __user *buffer, size_t *lenp, loff_t *ppos) 31 void __user *buffer, size_t *lenp, loff_t *ppos)
28{ 32{
29 unsigned int orig_size, size; 33 unsigned int orig_size, size;
30 int ret, i; 34 int ret, i;
31 ctl_table tmp = { 35 struct ctl_table tmp = {
32 .data = &size, 36 .data = &size,
33 .maxlen = sizeof(size), 37 .maxlen = sizeof(size),
34 .mode = table->mode 38 .mode = table->mode
@@ -87,6 +91,129 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
87} 91}
88#endif /* CONFIG_RPS */ 92#endif /* CONFIG_RPS */
89 93
94#ifdef CONFIG_NET_FLOW_LIMIT
95static DEFINE_MUTEX(flow_limit_update_mutex);
96
97static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
98 void __user *buffer, size_t *lenp,
99 loff_t *ppos)
100{
101 struct sd_flow_limit *cur;
102 struct softnet_data *sd;
103 cpumask_var_t mask;
104 int i, len, ret = 0;
105
106 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
107 return -ENOMEM;
108
109 if (write) {
110 ret = cpumask_parse_user(buffer, *lenp, mask);
111 if (ret)
112 goto done;
113
114 mutex_lock(&flow_limit_update_mutex);
115 len = sizeof(*cur) + netdev_flow_limit_table_len;
116 for_each_possible_cpu(i) {
117 sd = &per_cpu(softnet_data, i);
118 cur = rcu_dereference_protected(sd->flow_limit,
119 lockdep_is_held(&flow_limit_update_mutex));
120 if (cur && !cpumask_test_cpu(i, mask)) {
121 RCU_INIT_POINTER(sd->flow_limit, NULL);
122 synchronize_rcu();
123 kfree(cur);
124 } else if (!cur && cpumask_test_cpu(i, mask)) {
125 cur = kzalloc(len, GFP_KERNEL);
126 if (!cur) {
127 /* not unwinding previous changes */
128 ret = -ENOMEM;
129 goto write_unlock;
130 }
131 cur->num_buckets = netdev_flow_limit_table_len;
132 rcu_assign_pointer(sd->flow_limit, cur);
133 }
134 }
135write_unlock:
136 mutex_unlock(&flow_limit_update_mutex);
137 } else {
138 char kbuf[128];
139
140 if (*ppos || !*lenp) {
141 *lenp = 0;
142 goto done;
143 }
144
145 cpumask_clear(mask);
146 rcu_read_lock();
147 for_each_possible_cpu(i) {
148 sd = &per_cpu(softnet_data, i);
149 if (rcu_dereference(sd->flow_limit))
150 cpumask_set_cpu(i, mask);
151 }
152 rcu_read_unlock();
153
154 len = min(sizeof(kbuf) - 1, *lenp);
155 len = cpumask_scnprintf(kbuf, len, mask);
156 if (!len) {
157 *lenp = 0;
158 goto done;
159 }
160 if (len < *lenp)
161 kbuf[len++] = '\n';
162 if (copy_to_user(buffer, kbuf, len)) {
163 ret = -EFAULT;
164 goto done;
165 }
166 *lenp = len;
167 *ppos += len;
168 }
169
170done:
171 free_cpumask_var(mask);
172 return ret;
173}
174
175static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
176 void __user *buffer, size_t *lenp,
177 loff_t *ppos)
178{
179 unsigned int old, *ptr;
180 int ret;
181
182 mutex_lock(&flow_limit_update_mutex);
183
184 ptr = table->data;
185 old = *ptr;
186 ret = proc_dointvec(table, write, buffer, lenp, ppos);
187 if (!ret && write && !is_power_of_2(*ptr)) {
188 *ptr = old;
189 ret = -EINVAL;
190 }
191
192 mutex_unlock(&flow_limit_update_mutex);
193 return ret;
194}
195#endif /* CONFIG_NET_FLOW_LIMIT */
196
197#ifdef CONFIG_NET_SCHED
198static int set_default_qdisc(struct ctl_table *table, int write,
199 void __user *buffer, size_t *lenp, loff_t *ppos)
200{
201 char id[IFNAMSIZ];
202 struct ctl_table tbl = {
203 .data = id,
204 .maxlen = IFNAMSIZ,
205 };
206 int ret;
207
208 qdisc_get_default(id, IFNAMSIZ);
209
210 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
211 if (write && ret == 0)
212 ret = qdisc_set_default(id);
213 return ret;
214}
215#endif
216
90static struct ctl_table net_core_table[] = { 217static struct ctl_table net_core_table[] = {
91#ifdef CONFIG_NET 218#ifdef CONFIG_NET
92 { 219 {
@@ -180,6 +307,44 @@ static struct ctl_table net_core_table[] = {
180 .proc_handler = rps_sock_flow_sysctl 307 .proc_handler = rps_sock_flow_sysctl
181 }, 308 },
182#endif 309#endif
310#ifdef CONFIG_NET_FLOW_LIMIT
311 {
312 .procname = "flow_limit_cpu_bitmap",
313 .mode = 0644,
314 .proc_handler = flow_limit_cpu_sysctl
315 },
316 {
317 .procname = "flow_limit_table_len",
318 .data = &netdev_flow_limit_table_len,
319 .maxlen = sizeof(int),
320 .mode = 0644,
321 .proc_handler = flow_limit_table_len_sysctl
322 },
323#endif /* CONFIG_NET_FLOW_LIMIT */
324#ifdef CONFIG_NET_RX_BUSY_POLL
325 {
326 .procname = "busy_poll",
327 .data = &sysctl_net_busy_poll,
328 .maxlen = sizeof(unsigned int),
329 .mode = 0644,
330 .proc_handler = proc_dointvec
331 },
332 {
333 .procname = "busy_read",
334 .data = &sysctl_net_busy_read,
335 .maxlen = sizeof(unsigned int),
336 .mode = 0644,
337 .proc_handler = proc_dointvec
338 },
339#endif
340#ifdef CONFIG_NET_SCHED
341 {
342 .procname = "default_qdisc",
343 .mode = 0644,
344 .maxlen = IFNAMSIZ,
345 .proc_handler = set_default_qdisc
346 },
347#endif
183#endif /* CONFIG_NET */ 348#endif /* CONFIG_NET */
184 { 349 {
185 .procname = "netdev_budget", 350 .procname = "netdev_budget",
@@ -204,7 +369,9 @@ static struct ctl_table netns_core_table[] = {
204 .data = &init_net.core.sysctl_somaxconn, 369 .data = &init_net.core.sysctl_somaxconn,
205 .maxlen = sizeof(int), 370 .maxlen = sizeof(int),
206 .mode = 0644, 371 .mode = 0644,
207 .proc_handler = proc_dointvec 372 .extra1 = &zero,
373 .extra2 = &ushort_max,
374 .proc_handler = proc_dointvec_minmax
208 }, 375 },
209 { } 376 { }
210}; 377};
diff --git a/net/core/utils.c b/net/core/utils.c
index 3c7f5b51b979..aa88e23fc87a 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -338,25 +338,3 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
338 csum_unfold(*sum))); 338 csum_unfold(*sum)));
339} 339}
340EXPORT_SYMBOL(inet_proto_csum_replace16); 340EXPORT_SYMBOL(inet_proto_csum_replace16);
341
342int mac_pton(const char *s, u8 *mac)
343{
344 int i;
345
346 /* XX:XX:XX:XX:XX:XX */
347 if (strlen(s) < 3 * ETH_ALEN - 1)
348 return 0;
349
350 /* Don't dirty result unless string is valid MAC. */
351 for (i = 0; i < ETH_ALEN; i++) {
352 if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1]))
353 return 0;
354 if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
355 return 0;
356 }
357 for (i = 0; i < ETH_ALEN; i++) {
358 mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]);
359 }
360 return 1;
361}
362EXPORT_SYMBOL(mac_pton);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9c61f9c02fdb..6cf9f7782ad4 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -135,6 +135,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
135 135
136 if (dst) 136 if (dst)
137 dst->ops->redirect(dst, sk, skb); 137 dst->ops->redirect(dst, sk, skb);
138 goto out;
138 } 139 }
139 140
140 if (type == ICMPV6_PKT_TOOBIG) { 141 if (type == ICMPV6_PKT_TOOBIG) {
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 6c7c78b83940..ba64750f0387 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -336,7 +336,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
336 mask |= POLLIN | POLLRDNORM; 336 mask |= POLLIN | POLLRDNORM;
337 337
338 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 338 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
339 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 339 if (sk_stream_is_writeable(sk)) {
340 mask |= POLLOUT | POLLWRNORM; 340 mask |= POLLOUT | POLLWRNORM;
341 } else { /* send SIGIO later */ 341 } else { /* send SIGIO later */
342 set_bit(SOCK_ASYNC_NOSPACE, 342 set_bit(SOCK_ASYNC_NOSPACE,
@@ -347,7 +347,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
347 * wspace test but before the flags are set, 347 * wspace test but before the flags are set,
348 * IO signal will be lost. 348 * IO signal will be lost.
349 */ 349 */
350 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 350 if (sk_stream_is_writeable(sk))
351 mask |= POLLOUT | POLLWRNORM; 351 mask |= POLLOUT | POLLWRNORM;
352 } 352 }
353 } 353 }
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index c21f200eed93..dd4d506ef923 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2078,9 +2078,9 @@ out_err:
2078} 2078}
2079 2079
2080static int dn_device_event(struct notifier_block *this, unsigned long event, 2080static int dn_device_event(struct notifier_block *this, unsigned long event,
2081 void *ptr) 2081 void *ptr)
2082{ 2082{
2083 struct net_device *dev = (struct net_device *)ptr; 2083 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2084 2084
2085 if (!net_eq(dev_net(dev), &init_net)) 2085 if (!net_eq(dev_net(dev), &init_net))
2086 return NOTIFY_DONE; 2086 return NOTIFY_DONE;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 7d9197063ebb..dd0dfb25f4b1 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -158,11 +158,11 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU
158static int min_priority[1]; 158static int min_priority[1];
159static int max_priority[] = { 127 }; /* From DECnet spec */ 159static int max_priority[] = { 127 }; /* From DECnet spec */
160 160
161static int dn_forwarding_proc(ctl_table *, int, 161static int dn_forwarding_proc(struct ctl_table *, int,
162 void __user *, size_t *, loff_t *); 162 void __user *, size_t *, loff_t *);
163static struct dn_dev_sysctl_table { 163static struct dn_dev_sysctl_table {
164 struct ctl_table_header *sysctl_header; 164 struct ctl_table_header *sysctl_header;
165 ctl_table dn_dev_vars[5]; 165 struct ctl_table dn_dev_vars[5];
166} dn_dev_sysctl = { 166} dn_dev_sysctl = {
167 NULL, 167 NULL,
168 { 168 {
@@ -242,7 +242,7 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
242 } 242 }
243} 243}
244 244
245static int dn_forwarding_proc(ctl_table *table, int write, 245static int dn_forwarding_proc(struct ctl_table *table, int write,
246 void __user *buffer, 246 void __user *buffer,
247 size_t *lenp, loff_t *ppos) 247 size_t *lenp, loff_t *ppos)
248{ 248{
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index a55eeccaa72f..5325b541c526 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str)
132 return 0; 132 return 0;
133} 133}
134 134
135static int dn_node_address_handler(ctl_table *table, int write, 135static int dn_node_address_handler(struct ctl_table *table, int write,
136 void __user *buffer, 136 void __user *buffer,
137 size_t *lenp, loff_t *ppos) 137 size_t *lenp, loff_t *ppos)
138{ 138{
@@ -183,7 +183,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
183 return 0; 183 return 0;
184} 184}
185 185
186static int dn_def_dev_handler(ctl_table *table, int write, 186static int dn_def_dev_handler(struct ctl_table *table, int write,
187 void __user *buffer, 187 void __user *buffer,
188 size_t *lenp, loff_t *ppos) 188 size_t *lenp, loff_t *ppos)
189{ 189{
@@ -246,7 +246,7 @@ static int dn_def_dev_handler(ctl_table *table, int write,
246 return 0; 246 return 0;
247} 247}
248 248
249static ctl_table dn_table[] = { 249static struct ctl_table dn_table[] = {
250 { 250 {
251 .procname = "node_address", 251 .procname = "node_address",
252 .maxlen = 7, 252 .maxlen = 7,
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 0a69d0757795..f347a2ca7d7e 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -118,7 +118,7 @@ dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep)
118 if (opt_vlen <= 0) 118 if (opt_vlen <= 0)
119 goto bad_option_value; 119 goto bad_option_value;
120 120
121 ret = strict_strtoul(eq, 10, &derrno); 121 ret = kstrtoul(eq, 10, &derrno);
122 if (ret < 0) 122 if (ret < 0)
123 goto bad_option_value; 123 goto bad_option_value;
124 124
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6ebd8fbd9285..29d684ebca6a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -347,7 +347,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
347 347
348 slave_dev->features = master->vlan_features; 348 slave_dev->features = master->vlan_features;
349 SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); 349 SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops);
350 memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); 350 eth_hw_addr_inherit(slave_dev, master);
351 slave_dev->tx_queue_len = 0; 351 slave_dev->tx_queue_len = 0;
352 352
353 switch (ds->dst->tag_protocol) { 353 switch (ds->dst->tag_protocol) {
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 5359560926bc..be1f64d35358 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -401,27 +401,8 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
401} 401}
402EXPORT_SYMBOL(alloc_etherdev_mqs); 402EXPORT_SYMBOL(alloc_etherdev_mqs);
403 403
404static size_t _format_mac_addr(char *buf, int buflen,
405 const unsigned char *addr, int len)
406{
407 int i;
408 char *cp = buf;
409
410 for (i = 0; i < len; i++) {
411 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
412 if (i == len - 1)
413 break;
414 cp += scnprintf(cp, buflen - (cp - buf), ":");
415 }
416 return cp - buf;
417}
418
419ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) 404ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
420{ 405{
421 size_t l; 406 return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
422
423 l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
424 l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
425 return (ssize_t)l;
426} 407}
427EXPORT_SYMBOL(sysfs_format_mac); 408EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 55e1fd5b3e56..c85e71e0c7ff 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -67,39 +67,6 @@ static const u8 lowpan_ttl_values[] = {0, 1, 64, 255};
67 67
68static LIST_HEAD(lowpan_devices); 68static LIST_HEAD(lowpan_devices);
69 69
70/*
71 * Uncompression of linklocal:
72 * 0 -> 16 bytes from packet
73 * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet
74 * 2 -> 2 bytes from prefix - zeroes + 2 from packet
75 * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr
76 *
77 * NOTE: => the uncompress function does change 0xf to 0x10
78 * NOTE: 0x00 => no-autoconfig => unspecified
79 */
80static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20};
81
82/*
83 * Uncompression of ctx-based:
84 * 0 -> 0 bits from packet [unspecified / reserved]
85 * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet
86 * 2 -> 8 bytes from prefix - zeroes + 2 from packet
87 * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr
88 */
89static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80};
90
91/*
92 * Uncompression of ctx-base
93 * 0 -> 0 bits from packet
94 * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet
95 * 2 -> 2 bytes from prefix - zeroes + 3 from packet
96 * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr
97 */
98static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21};
99
100/* Link local prefix */
101static const u8 lowpan_llprefix[] = {0xfe, 0x80};
102
103/* private device info */ 70/* private device info */
104struct lowpan_dev_info { 71struct lowpan_dev_info {
105 struct net_device *real_dev; /* real WPAN device ptr */ 72 struct net_device *real_dev; /* real WPAN device ptr */
@@ -191,55 +158,177 @@ lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr,
191 return rol8(val, shift); 158 return rol8(val, shift);
192} 159}
193 160
194static void 161/*
195lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr) 162 * Uncompress address function for source and
163 * destination address(non-multicast).
164 *
165 * address_mode is sam value or dam value.
166 */
167static int
168lowpan_uncompress_addr(struct sk_buff *skb,
169 struct in6_addr *ipaddr,
170 const u8 address_mode,
171 const struct ieee802154_addr *lladdr)
196{ 172{
197 memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ADDR_LEN); 173 bool fail;
198 /* second bit-flip (Universe/Local) is done according RFC2464 */ 174
199 ipaddr->s6_addr[8] ^= 0x02; 175 switch (address_mode) {
176 case LOWPAN_IPHC_ADDR_00:
177 /* for global link addresses */
178 fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
179 break;
180 case LOWPAN_IPHC_ADDR_01:
181 /* fe:80::XXXX:XXXX:XXXX:XXXX */
182 ipaddr->s6_addr[0] = 0xFE;
183 ipaddr->s6_addr[1] = 0x80;
184 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8);
185 break;
186 case LOWPAN_IPHC_ADDR_02:
187 /* fe:80::ff:fe00:XXXX */
188 ipaddr->s6_addr[0] = 0xFE;
189 ipaddr->s6_addr[1] = 0x80;
190 ipaddr->s6_addr[11] = 0xFF;
191 ipaddr->s6_addr[12] = 0xFE;
192 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2);
193 break;
194 case LOWPAN_IPHC_ADDR_03:
195 fail = false;
196 switch (lladdr->addr_type) {
197 case IEEE802154_ADDR_LONG:
198 /* fe:80::XXXX:XXXX:XXXX:XXXX
199 * \_________________/
200 * hwaddr
201 */
202 ipaddr->s6_addr[0] = 0xFE;
203 ipaddr->s6_addr[1] = 0x80;
204 memcpy(&ipaddr->s6_addr[8], lladdr->hwaddr,
205 IEEE802154_ADDR_LEN);
206 /* second bit-flip (Universe/Local)
207 * is done according RFC2464
208 */
209 ipaddr->s6_addr[8] ^= 0x02;
210 break;
211 case IEEE802154_ADDR_SHORT:
212 /* fe:80::ff:fe00:XXXX
213 * \__/
214 * short_addr
215 *
216 * Universe/Local bit is zero.
217 */
218 ipaddr->s6_addr[0] = 0xFE;
219 ipaddr->s6_addr[1] = 0x80;
220 ipaddr->s6_addr[11] = 0xFF;
221 ipaddr->s6_addr[12] = 0xFE;
222 ipaddr->s6_addr16[7] = htons(lladdr->short_addr);
223 break;
224 default:
225 pr_debug("Invalid addr_type set\n");
226 return -EINVAL;
227 }
228 break;
229 default:
230 pr_debug("Invalid address mode value: 0x%x\n", address_mode);
231 return -EINVAL;
232 }
233
234 if (fail) {
235 pr_debug("Failed to fetch skb data\n");
236 return -EIO;
237 }
238
239 lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 addr is:\n",
240 ipaddr->s6_addr, 16);
241
242 return 0;
200} 243}
201 244
202/* 245/* Uncompress address function for source context
203 * Uncompress addresses based on a prefix and a postfix with zeroes in 246 * based address(non-multicast).
204 * between. If the postfix is zero in length it will use the link address
205 * to configure the IP address (autoconf style).
206 * pref_post_count takes a byte where the first nibble specify prefix count
207 * and the second postfix count (NOTE: 15/0xf => 16 bytes copy).
208 */ 247 */
209static int 248static int
210lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr, 249lowpan_uncompress_context_based_src_addr(struct sk_buff *skb,
211 u8 const *prefix, u8 pref_post_count, unsigned char *lladdr) 250 struct in6_addr *ipaddr,
251 const u8 sam)
212{ 252{
213 u8 prefcount = pref_post_count >> 4; 253 switch (sam) {
214 u8 postcount = pref_post_count & 0x0f; 254 case LOWPAN_IPHC_ADDR_00:
215 255 /* unspec address ::
216 /* full nibble 15 => 16 */ 256 * Do nothing, address is already ::
217 prefcount = (prefcount == 15 ? 16 : prefcount); 257 */
218 postcount = (postcount == 15 ? 16 : postcount); 258 break;
219 259 case LOWPAN_IPHC_ADDR_01:
220 if (lladdr) 260 /* TODO */
221 lowpan_raw_dump_inline(__func__, "linklocal address", 261 case LOWPAN_IPHC_ADDR_02:
222 lladdr, IEEE802154_ADDR_LEN); 262 /* TODO */
223 if (prefcount > 0) 263 case LOWPAN_IPHC_ADDR_03:
224 memcpy(ipaddr, prefix, prefcount); 264 /* TODO */
225 265 netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam);
226 if (prefcount + postcount < 16) 266 return -EINVAL;
227 memset(&ipaddr->s6_addr[prefcount], 0, 267 default:
228 16 - (prefcount + postcount)); 268 pr_debug("Invalid sam value: 0x%x\n", sam);
229 269 return -EINVAL;
230 if (postcount > 0) { 270 }
231 memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount); 271
232 skb_pull(skb, postcount); 272 lowpan_raw_dump_inline(NULL,
233 } else if (prefcount > 0) { 273 "Reconstructed context based ipv6 src addr is:\n",
234 if (lladdr == NULL) 274 ipaddr->s6_addr, 16);
235 return -EINVAL; 275
276 return 0;
277}
236 278
237 /* no IID based configuration if no prefix and no data */ 279/* Uncompress function for multicast destination address,
238 lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr); 280 * when M bit is set.
281 */
282static int
283lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
284 struct in6_addr *ipaddr,
285 const u8 dam)
286{
287 bool fail;
288
289 switch (dam) {
290 case LOWPAN_IPHC_DAM_00:
291 /* 00: 128 bits. The full address
292 * is carried in-line.
293 */
294 fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
295 break;
296 case LOWPAN_IPHC_DAM_01:
297 /* 01: 48 bits. The address takes
298 * the form ffXX::00XX:XXXX:XXXX.
299 */
300 ipaddr->s6_addr[0] = 0xFF;
301 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1);
302 fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[11], 5);
303 break;
304 case LOWPAN_IPHC_DAM_10:
305 /* 10: 32 bits. The address takes
306 * the form ffXX::00XX:XXXX.
307 */
308 ipaddr->s6_addr[0] = 0xFF;
309 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1);
310 fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[13], 3);
311 break;
312 case LOWPAN_IPHC_DAM_11:
313 /* 11: 8 bits. The address takes
314 * the form ff02::00XX.
315 */
316 ipaddr->s6_addr[0] = 0xFF;
317 ipaddr->s6_addr[1] = 0x02;
318 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1);
319 break;
320 default:
321 pr_debug("DAM value has a wrong value: 0x%x\n", dam);
322 return -EINVAL;
323 }
324
325 if (fail) {
326 pr_debug("Failed to fetch skb data\n");
327 return -EIO;
239 } 328 }
240 329
241 pr_debug("uncompressing %d + %d => ", prefcount, postcount); 330 lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is:\n",
242 lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16); 331 ipaddr->s6_addr, 16);
243 332
244 return 0; 333 return 0;
245} 334}
@@ -702,6 +791,12 @@ lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
702 skb_reserve(frame->skb, sizeof(struct ipv6hdr)); 791 skb_reserve(frame->skb, sizeof(struct ipv6hdr));
703 skb_put(frame->skb, frame->length); 792 skb_put(frame->skb, frame->length);
704 793
794 /* copy the first control block to keep a
795 * trace of the link-layer addresses in case
796 * of a link-local compressed address
797 */
798 memcpy(frame->skb->cb, skb->cb, sizeof(skb->cb));
799
705 init_timer(&frame->timer); 800 init_timer(&frame->timer);
706 /* time out is the same as for ipv6 - 60 sec */ 801 /* time out is the same as for ipv6 - 60 sec */
707 frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT; 802 frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT;
@@ -723,9 +818,9 @@ frame_err:
723static int 818static int
724lowpan_process_data(struct sk_buff *skb) 819lowpan_process_data(struct sk_buff *skb)
725{ 820{
726 struct ipv6hdr hdr; 821 struct ipv6hdr hdr = {};
727 u8 tmp, iphc0, iphc1, num_context = 0; 822 u8 tmp, iphc0, iphc1, num_context = 0;
728 u8 *_saddr, *_daddr; 823 const struct ieee802154_addr *_saddr, *_daddr;
729 int err; 824 int err;
730 825
731 lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, 826 lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
@@ -828,8 +923,8 @@ lowpan_process_data(struct sk_buff *skb)
828 if (lowpan_fetch_skb_u8(skb, &iphc1)) 923 if (lowpan_fetch_skb_u8(skb, &iphc1))
829 goto drop; 924 goto drop;
830 925
831 _saddr = mac_cb(skb)->sa.hwaddr; 926 _saddr = &mac_cb(skb)->sa;
832 _daddr = mac_cb(skb)->da.hwaddr; 927 _daddr = &mac_cb(skb)->da;
833 928
834 pr_debug("iphc0 = %02x, iphc1 = %02x\n", iphc0, iphc1); 929 pr_debug("iphc0 = %02x, iphc1 = %02x\n", iphc0, iphc1);
835 930
@@ -868,8 +963,6 @@ lowpan_process_data(struct sk_buff *skb)
868 963
869 hdr.priority = ((tmp >> 2) & 0x0f); 964 hdr.priority = ((tmp >> 2) & 0x0f);
870 hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); 965 hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30);
871 hdr.flow_lbl[1] = 0;
872 hdr.flow_lbl[2] = 0;
873 break; 966 break;
874 /* 967 /*
875 * Flow Label carried in-line 968 * Flow Label carried in-line
@@ -885,10 +978,6 @@ lowpan_process_data(struct sk_buff *skb)
885 break; 978 break;
886 /* Traffic Class and Flow Label are elided */ 979 /* Traffic Class and Flow Label are elided */
887 case 3: /* 11b */ 980 case 3: /* 11b */
888 hdr.priority = 0;
889 hdr.flow_lbl[0] = 0;
890 hdr.flow_lbl[1] = 0;
891 hdr.flow_lbl[2] = 0;
892 break; 981 break;
893 default: 982 default:
894 break; 983 break;
@@ -915,10 +1004,18 @@ lowpan_process_data(struct sk_buff *skb)
915 /* Extract SAM to the tmp variable */ 1004 /* Extract SAM to the tmp variable */
916 tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; 1005 tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03;
917 1006
918 /* Source address uncompression */ 1007 if (iphc1 & LOWPAN_IPHC_SAC) {
919 pr_debug("source address stateless compression\n"); 1008 /* Source address context based uncompression */
920 err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix, 1009 pr_debug("SAC bit is set. Handle context based source address.\n");
921 lowpan_unc_llconf[tmp], skb->data); 1010 err = lowpan_uncompress_context_based_src_addr(
1011 skb, &hdr.saddr, tmp);
1012 } else {
1013 /* Source address uncompression */
1014 pr_debug("source address stateless compression\n");
1015 err = lowpan_uncompress_addr(skb, &hdr.saddr, tmp, _saddr);
1016 }
1017
1018 /* Check on error of previous branch */
922 if (err) 1019 if (err)
923 goto drop; 1020 goto drop;
924 1021
@@ -931,23 +1028,14 @@ lowpan_process_data(struct sk_buff *skb)
931 pr_debug("dest: context-based mcast compression\n"); 1028 pr_debug("dest: context-based mcast compression\n");
932 /* TODO: implement this */ 1029 /* TODO: implement this */
933 } else { 1030 } else {
934 u8 prefix[] = {0xff, 0x02}; 1031 err = lowpan_uncompress_multicast_daddr(
935 1032 skb, &hdr.daddr, tmp);
936 pr_debug("dest: non context-based mcast compression\n");
937 if (0 < tmp && tmp < 3) {
938 if (lowpan_fetch_skb_u8(skb, &prefix[1]))
939 goto drop;
940 }
941
942 err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix,
943 lowpan_unc_mxconf[tmp], NULL);
944 if (err) 1033 if (err)
945 goto drop; 1034 goto drop;
946 } 1035 }
947 } else { 1036 } else {
948 pr_debug("dest: stateless compression\n"); 1037 pr_debug("dest: stateless compression\n");
949 err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix, 1038 err = lowpan_uncompress_addr(skb, &hdr.daddr, tmp, _daddr);
950 lowpan_unc_llconf[tmp], skb->data);
951 if (err) 1039 if (err)
952 goto drop; 1040 goto drop;
953 } 1041 }
@@ -1352,10 +1440,9 @@ static inline void lowpan_netlink_fini(void)
1352} 1440}
1353 1441
1354static int lowpan_device_event(struct notifier_block *unused, 1442static int lowpan_device_event(struct notifier_block *unused,
1355 unsigned long event, 1443 unsigned long event, void *ptr)
1356 void *ptr)
1357{ 1444{
1358 struct net_device *dev = ptr; 1445 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1359 LIST_HEAD(del_list); 1446 LIST_HEAD(del_list);
1360 struct lowpan_dev_record *entry, *tmp; 1447 struct lowpan_dev_record *entry, *tmp;
1361 1448
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index 4b8f917658b5..2869c0526dad 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -193,10 +193,12 @@
193/* Values of fields within the IPHC encoding second byte */ 193/* Values of fields within the IPHC encoding second byte */
194#define LOWPAN_IPHC_CID 0x80 194#define LOWPAN_IPHC_CID 0x80
195 195
196#define LOWPAN_IPHC_ADDR_00 0x00
197#define LOWPAN_IPHC_ADDR_01 0x01
198#define LOWPAN_IPHC_ADDR_02 0x02
199#define LOWPAN_IPHC_ADDR_03 0x03
200
196#define LOWPAN_IPHC_SAC 0x40 201#define LOWPAN_IPHC_SAC 0x40
197#define LOWPAN_IPHC_SAM_00 0x00
198#define LOWPAN_IPHC_SAM_01 0x10
199#define LOWPAN_IPHC_SAM_10 0x20
200#define LOWPAN_IPHC_SAM 0x30 202#define LOWPAN_IPHC_SAM 0x30
201 203
202#define LOWPAN_IPHC_SAM_BIT 4 204#define LOWPAN_IPHC_SAM_BIT 4
@@ -230,4 +232,16 @@
230 dest = 16 bit inline */ 232 dest = 16 bit inline */
231#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ 233#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */
232 234
235static inline bool lowpan_fetch_skb(struct sk_buff *skb,
236 void *data, const unsigned int len)
237{
238 if (unlikely(!pskb_may_pull(skb, len)))
239 return true;
240
241 skb_copy_from_linear_data(skb, data, len);
242 skb_pull(skb, len);
243
244 return false;
245}
246
233#endif /* __6LOWPAN_H__ */ 247#endif /* __6LOWPAN_H__ */
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 13571eae6bae..ef56ab5b35fe 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -36,7 +36,8 @@ static ssize_t name ## _show(struct device *dev, \
36 ret = snprintf(buf, PAGE_SIZE, format_string "\n", args); \ 36 ret = snprintf(buf, PAGE_SIZE, format_string "\n", args); \
37 mutex_unlock(&phy->pib_lock); \ 37 mutex_unlock(&phy->pib_lock); \
38 return ret; \ 38 return ret; \
39} 39} \
40static DEVICE_ATTR_RO(name);
40 41
41#define MASTER_SHOW(field, format_string) \ 42#define MASTER_SHOW(field, format_string) \
42 MASTER_SHOW_COMPLEX(field, format_string, phy->field) 43 MASTER_SHOW_COMPLEX(field, format_string, phy->field)
@@ -66,15 +67,17 @@ static ssize_t channels_supported_show(struct device *dev,
66 mutex_unlock(&phy->pib_lock); 67 mutex_unlock(&phy->pib_lock);
67 return len; 68 return len;
68} 69}
69 70static DEVICE_ATTR_RO(channels_supported);
70static struct device_attribute pmib_attrs[] = { 71
71 __ATTR_RO(current_channel), 72static struct attribute *pmib_attrs[] = {
72 __ATTR_RO(current_page), 73 &dev_attr_current_channel.attr,
73 __ATTR_RO(channels_supported), 74 &dev_attr_current_page.attr,
74 __ATTR_RO(transmit_power), 75 &dev_attr_channels_supported.attr,
75 __ATTR_RO(cca_mode), 76 &dev_attr_transmit_power.attr,
76 {}, 77 &dev_attr_cca_mode.attr,
78 NULL,
77}; 79};
80ATTRIBUTE_GROUPS(pmib);
78 81
79static void wpan_phy_release(struct device *d) 82static void wpan_phy_release(struct device *d)
80{ 83{
@@ -85,7 +88,7 @@ static void wpan_phy_release(struct device *d)
85static struct class wpan_phy_class = { 88static struct class wpan_phy_class = {
86 .name = "ieee802154", 89 .name = "ieee802154",
87 .dev_release = wpan_phy_release, 90 .dev_release = wpan_phy_release,
88 .dev_attrs = pmib_attrs, 91 .dev_groups = pmib_groups,
89}; 92};
90 93
91static DEFINE_MUTEX(wpan_phy_mutex); 94static DEFINE_MUTEX(wpan_phy_mutex);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8603ca827104..05c57f0fcabe 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -9,10 +9,7 @@ config IP_MULTICAST
9 intend to participate in the MBONE, a high bandwidth network on top 9 intend to participate in the MBONE, a high bandwidth network on top
10 of the Internet which carries audio and video broadcasts. More 10 of the Internet which carries audio and video broadcasts. More
11 information about the MBONE is on the WWW at 11 information about the MBONE is on the WWW at
12 <http://www.savetz.com/mbone/>. Information about the multicast 12 <http://www.savetz.com/mbone/>. For most people, it's safe to say N.
13 capabilities of the various network cards is contained in
14 <file:Documentation/networking/multicast.txt>. For most people, it's
15 safe to say N.
16 13
17config IP_ADVANCED_ROUTER 14config IP_ADVANCED_ROUTER
18 bool "IP: advanced router" 15 bool "IP: advanced router"
@@ -223,10 +220,8 @@ config IP_MROUTE
223 packets that have several destination addresses. It is needed on the 220 packets that have several destination addresses. It is needed on the
224 MBONE, a high bandwidth network on top of the Internet which carries 221 MBONE, a high bandwidth network on top of the Internet which carries
225 audio and video broadcasts. In order to do that, you would most 222 audio and video broadcasts. In order to do that, you would most
226 likely run the program mrouted. Information about the multicast 223 likely run the program mrouted. If you haven't heard about it, you
227 capabilities of the various network cards is contained in 224 don't need it.
228 <file:Documentation/networking/multicast.txt>. If you haven't heard
229 about it, you don't need it.
230 225
231config IP_MROUTE_MULTIPLE_TABLES 226config IP_MROUTE_MULTIPLE_TABLES
232 bool "IP: multicast policy routing" 227 bool "IP: multicast policy routing"
@@ -264,22 +259,6 @@ config IP_PIMSM_V2
264 gated-5). This routing protocol is not used widely, so say N unless 259 gated-5). This routing protocol is not used widely, so say N unless
265 you want to play with it. 260 you want to play with it.
266 261
267config ARPD
268 bool "IP: ARP daemon support"
269 ---help---
270 The kernel maintains an internal cache which maps IP addresses to
271 hardware addresses on the local network, so that Ethernet
272 frames are sent to the proper address on the physical networking
273 layer. Normally, kernel uses the ARP protocol to resolve these
274 mappings.
275
276 Saying Y here adds support to have an user space daemon to do this
277 resolution instead. This is useful for implementing an alternate
278 address resolution protocol (e.g. NHRP on mGRE tunnels) and also for
279 testing purposes.
280
281 If unsure, say N.
282
283config SYN_COOKIES 262config SYN_COOKIES
284 bool "IP: TCP syncookie support" 263 bool "IP: TCP syncookie support"
285 ---help--- 264 ---help---
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 089cb9f36387..4b81e91c80fe 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,10 +8,10 @@ obj-y := route.o inetpeer.o protocol.o \
8 inet_timewait_sock.o inet_connection_sock.o \ 8 inet_timewait_sock.o inet_connection_sock.o \
9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ 9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
10 tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ 10 tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
11 datagram.o raw.o udp.o udplite.o \ 11 tcp_offload.o datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o fib_trie.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o ping.o 14 inet_fragment.o ping.o ip_tunnel_core.o
15 15
16obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o 16obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
17obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 17obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
@@ -19,6 +19,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
19obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 19obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
20obj-$(CONFIG_IP_MROUTE) += ipmr.o 20obj-$(CONFIG_IP_MROUTE) += ipmr.o
21obj-$(CONFIG_NET_IPIP) += ipip.o 21obj-$(CONFIG_NET_IPIP) += ipip.o
22gre-y := gre_demux.o gre_offload.o
22obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o 23obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
23obj-$(CONFIG_NET_IPGRE) += ip_gre.o 24obj-$(CONFIG_NET_IPGRE) += ip_gre.o
24obj-$(CONFIG_NET_IPVTI) += ip_vti.o 25obj-$(CONFIG_NET_IPVTI) += ip_vti.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d01be2a3ae53..cfeb85cff4f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
263 get_random_bytes(&rnd, sizeof(rnd)); 263 get_random_bytes(&rnd, sizeof(rnd));
264 } while (rnd == 0); 264 } while (rnd == 0);
265 265
266 if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { 266 if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
267 get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); 267 get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
268 net_secret_init();
269 }
270} 268}
271EXPORT_SYMBOL(build_ehash_secret); 269EXPORT_SYMBOL(build_ehash_secret);
272 270
@@ -1295,6 +1293,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1295 SKB_GSO_GRE | 1293 SKB_GSO_GRE |
1296 SKB_GSO_TCPV6 | 1294 SKB_GSO_TCPV6 |
1297 SKB_GSO_UDP_TUNNEL | 1295 SKB_GSO_UDP_TUNNEL |
1296 SKB_GSO_MPLS |
1298 0))) 1297 0)))
1299 goto out; 1298 goto out;
1300 1299
@@ -1384,7 +1383,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1384 goto out_unlock; 1383 goto out_unlock;
1385 1384
1386 id = ntohl(*(__be32 *)&iph->id); 1385 id = ntohl(*(__be32 *)&iph->id);
1387 flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); 1386 flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
1388 id >>= 16; 1387 id >>= 16;
1389 1388
1390 for (p = *head; p; p = p->next) { 1389 for (p = *head; p; p = p->next) {
@@ -1406,6 +1405,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1406 NAPI_GRO_CB(p)->flush |= 1405 NAPI_GRO_CB(p)->flush |=
1407 (iph->ttl ^ iph2->ttl) | 1406 (iph->ttl ^ iph2->ttl) |
1408 (iph->tos ^ iph2->tos) | 1407 (iph->tos ^ iph2->tos) |
1408 (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) |
1409 ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); 1409 ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
1410 1410
1411 NAPI_GRO_CB(p)->flush |= flush; 1411 NAPI_GRO_CB(p)->flush |= flush;
@@ -1530,18 +1530,6 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
1530} 1530}
1531EXPORT_SYMBOL_GPL(snmp_mib_init); 1531EXPORT_SYMBOL_GPL(snmp_mib_init);
1532 1532
1533void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ])
1534{
1535 int i;
1536
1537 BUG_ON(ptr == NULL);
1538 for (i = 0; i < SNMP_ARRAY_SZ; i++) {
1539 free_percpu(ptr[i]);
1540 ptr[i] = NULL;
1541 }
1542}
1543EXPORT_SYMBOL_GPL(snmp_mib_free);
1544
1545#ifdef CONFIG_IP_MULTICAST 1533#ifdef CONFIG_IP_MULTICAST
1546static const struct net_protocol igmp_protocol = { 1534static const struct net_protocol igmp_protocol = {
1547 .handler = igmp_rcv, 1535 .handler = igmp_rcv,
@@ -1557,15 +1545,6 @@ static const struct net_protocol tcp_protocol = {
1557 .netns_ok = 1, 1545 .netns_ok = 1,
1558}; 1546};
1559 1547
1560static const struct net_offload tcp_offload = {
1561 .callbacks = {
1562 .gso_send_check = tcp_v4_gso_send_check,
1563 .gso_segment = tcp_tso_segment,
1564 .gro_receive = tcp4_gro_receive,
1565 .gro_complete = tcp4_gro_complete,
1566 },
1567};
1568
1569static const struct net_protocol udp_protocol = { 1548static const struct net_protocol udp_protocol = {
1570 .handler = udp_rcv, 1549 .handler = udp_rcv,
1571 .err_handler = udp_err, 1550 .err_handler = udp_err,
@@ -1573,13 +1552,6 @@ static const struct net_protocol udp_protocol = {
1573 .netns_ok = 1, 1552 .netns_ok = 1,
1574}; 1553};
1575 1554
1576static const struct net_offload udp_offload = {
1577 .callbacks = {
1578 .gso_send_check = udp4_ufo_send_check,
1579 .gso_segment = udp4_ufo_fragment,
1580 },
1581};
1582
1583static const struct net_protocol icmp_protocol = { 1555static const struct net_protocol icmp_protocol = {
1584 .handler = icmp_rcv, 1556 .handler = icmp_rcv,
1585 .err_handler = icmp_err, 1557 .err_handler = icmp_err,
@@ -1679,10 +1651,10 @@ static int __init ipv4_offload_init(void)
1679 /* 1651 /*
1680 * Add offloads 1652 * Add offloads
1681 */ 1653 */
1682 if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) 1654 if (udpv4_offload_init() < 0)
1683 pr_crit("%s: Cannot add UDP protocol offload\n", __func__); 1655 pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
1684 if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) 1656 if (tcpv4_offload_init() < 0)
1685 pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); 1657 pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
1686 1658
1687 dev_add_offload(&ip_packet_offload); 1659 dev_add_offload(&ip_packet_offload);
1688 return 0; 1660 return 0;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 2e7f1948216f..717902669d2f 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -419,12 +419,9 @@ static void ah4_err(struct sk_buff *skb, u32 info)
419 if (!x) 419 if (!x)
420 return; 420 return;
421 421
422 if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { 422 if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
423 atomic_inc(&flow_cache_genid);
424 rt_genid_bump(net);
425
426 ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); 423 ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
427 } else 424 else
428 ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); 425 ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
429 xfrm_state_put(x); 426 xfrm_state_put(x);
430} 427}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 247ec1951c35..7808093cede6 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -368,9 +368,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
368 } else { 368 } else {
369 probes -= neigh->parms->app_probes; 369 probes -= neigh->parms->app_probes;
370 if (probes < 0) { 370 if (probes < 0) {
371#ifdef CONFIG_ARPD
372 neigh_app_ns(neigh); 371 neigh_app_ns(neigh);
373#endif
374 return; 372 return;
375 } 373 }
376 } 374 }
@@ -1234,13 +1232,19 @@ out:
1234static int arp_netdev_event(struct notifier_block *this, unsigned long event, 1232static int arp_netdev_event(struct notifier_block *this, unsigned long event,
1235 void *ptr) 1233 void *ptr)
1236{ 1234{
1237 struct net_device *dev = ptr; 1235 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1236 struct netdev_notifier_change_info *change_info;
1238 1237
1239 switch (event) { 1238 switch (event) {
1240 case NETDEV_CHANGEADDR: 1239 case NETDEV_CHANGEADDR:
1241 neigh_changeaddr(&arp_tbl, dev); 1240 neigh_changeaddr(&arp_tbl, dev);
1242 rt_cache_flush(dev_net(dev)); 1241 rt_cache_flush(dev_net(dev));
1243 break; 1242 break;
1243 case NETDEV_CHANGE:
1244 change_info = ptr;
1245 if (change_info->flags_changed & IFF_NOARP)
1246 neigh_changeaddr(&arp_tbl, dev);
1247 break;
1244 default: 1248 default:
1245 break; 1249 break;
1246 } 1250 }
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index dfc39d4d48b7..a1b5bcbd04ae 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -73,6 +73,8 @@ static struct ipv4_devconf ipv4_devconf = {
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
76 }, 78 },
77}; 79};
78 80
@@ -83,6 +85,8 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
83 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
86 }, 90 },
87}; 91};
88 92
@@ -215,6 +219,7 @@ void in_dev_finish_destroy(struct in_device *idev)
215 219
216 WARN_ON(idev->ifa_list); 220 WARN_ON(idev->ifa_list);
217 WARN_ON(idev->mc_list); 221 WARN_ON(idev->mc_list);
222 kfree(rcu_dereference_protected(idev->mc_hash, 1));
218#ifdef NET_REFCNT_DEBUG 223#ifdef NET_REFCNT_DEBUG
219 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); 224 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220#endif 225#endif
@@ -771,7 +776,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
771 ci = nla_data(tb[IFA_CACHEINFO]); 776 ci = nla_data(tb[IFA_CACHEINFO]);
772 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { 777 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
773 err = -EINVAL; 778 err = -EINVAL;
774 goto errout; 779 goto errout_free;
775 } 780 }
776 *pvalid_lft = ci->ifa_valid; 781 *pvalid_lft = ci->ifa_valid;
777 *pprefered_lft = ci->ifa_prefered; 782 *pprefered_lft = ci->ifa_prefered;
@@ -779,6 +784,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
779 784
780 return ifa; 785 return ifa;
781 786
787errout_free:
788 inet_free_ifa(ifa);
782errout: 789errout:
783 return ERR_PTR(err); 790 return ERR_PTR(err);
784} 791}
@@ -1123,10 +1130,7 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1123 if (len < (int) sizeof(ifr)) 1130 if (len < (int) sizeof(ifr))
1124 break; 1131 break;
1125 memset(&ifr, 0, sizeof(struct ifreq)); 1132 memset(&ifr, 0, sizeof(struct ifreq));
1126 if (ifa->ifa_label) 1133 strcpy(ifr.ifr_name, ifa->ifa_label);
1127 strcpy(ifr.ifr_name, ifa->ifa_label);
1128 else
1129 strcpy(ifr.ifr_name, dev->name);
1130 1134
1131 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 1135 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1132 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 1136 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
@@ -1333,7 +1337,7 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
1333static int inetdev_event(struct notifier_block *this, unsigned long event, 1337static int inetdev_event(struct notifier_block *this, unsigned long event,
1334 void *ptr) 1338 void *ptr)
1335{ 1339{
1336 struct net_device *dev = ptr; 1340 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1337 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1341 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1338 1342
1339 ASSERT_RTNL(); 1343 ASSERT_RTNL();
@@ -1941,7 +1945,7 @@ static void inet_forward_change(struct net *net)
1941 } 1945 }
1942} 1946}
1943 1947
1944static int devinet_conf_proc(ctl_table *ctl, int write, 1948static int devinet_conf_proc(struct ctl_table *ctl, int write,
1945 void __user *buffer, 1949 void __user *buffer,
1946 size_t *lenp, loff_t *ppos) 1950 size_t *lenp, loff_t *ppos)
1947{ 1951{
@@ -1984,7 +1988,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
1984 return ret; 1988 return ret;
1985} 1989}
1986 1990
1987static int devinet_sysctl_forward(ctl_table *ctl, int write, 1991static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1988 void __user *buffer, 1992 void __user *buffer,
1989 size_t *lenp, loff_t *ppos) 1993 size_t *lenp, loff_t *ppos)
1990{ 1994{
@@ -2027,7 +2031,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
2027 return ret; 2031 return ret;
2028} 2032}
2029 2033
2030static int ipv4_doint_and_flush(ctl_table *ctl, int write, 2034static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2031 void __user *buffer, 2035 void __user *buffer,
2032 size_t *lenp, loff_t *ppos) 2036 size_t *lenp, loff_t *ppos)
2033{ 2037{
@@ -2094,11 +2098,15 @@ static struct devinet_sysctl_table {
2094 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 2098 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2095 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 2099 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2096 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 2100 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2101 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2102 "force_igmp_version"),
2103 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2104 "igmpv2_unsolicited_report_interval"),
2105 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2106 "igmpv3_unsolicited_report_interval"),
2097 2107
2098 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 2108 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2099 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 2109 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2100 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2101 "force_igmp_version"),
2102 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 2110 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2103 "promote_secondaries"), 2111 "promote_secondaries"),
2104 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, 2112 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4cfe34d4cc96..109ee89f123e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -477,7 +477,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
477 } 477 }
478 478
479 return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - 479 return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
480 net_adj) & ~(align - 1)) + (net_adj - 2); 480 net_adj) & ~(align - 1)) + net_adj - 2;
481} 481}
482 482
483static void esp4_err(struct sk_buff *skb, u32 info) 483static void esp4_err(struct sk_buff *skb, u32 info)
@@ -502,12 +502,9 @@ static void esp4_err(struct sk_buff *skb, u32 info)
502 if (!x) 502 if (!x)
503 return; 503 return;
504 504
505 if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { 505 if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
506 atomic_inc(&flow_cache_genid);
507 rt_genid_bump(net);
508
509 ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); 506 ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
510 } else 507 else
511 ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); 508 ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
512 xfrm_state_put(x); 509 xfrm_state_put(x);
513} 510}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c7629a209f9d..b3f627ac4ed8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -961,7 +961,7 @@ static void nl_fib_input(struct sk_buff *skb)
961 nlmsg_len(nlh) < sizeof(*frn)) 961 nlmsg_len(nlh) < sizeof(*frn))
962 return; 962 return;
963 963
964 skb = skb_clone(skb, GFP_KERNEL); 964 skb = netlink_skb_clone(skb, GFP_KERNEL);
965 if (skb == NULL) 965 if (skb == NULL)
966 return; 966 return;
967 nlh = nlmsg_hdr(skb); 967 nlh = nlmsg_hdr(skb);
@@ -1038,7 +1038,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
1038 1038
1039static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 1039static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1040{ 1040{
1041 struct net_device *dev = ptr; 1041 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1042 struct in_device *in_dev; 1042 struct in_device *in_dev;
1043 struct net *net = dev_net(dev); 1043 struct net *net = dev_net(dev);
1044 1044
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 26aa65d1fce4..523be38e37de 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -101,6 +101,30 @@ errout:
101 return err; 101 return err;
102} 102}
103 103
104static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
105{
106 struct fib_result *result = (struct fib_result *) arg->result;
107 struct net_device *dev = result->fi->fib_dev;
108
109 /* do not accept result if the route does
110 * not meet the required prefix length
111 */
112 if (result->prefixlen <= rule->suppress_prefixlen)
113 goto suppress_route;
114
115 /* do not accept result if the route uses a device
116 * belonging to a forbidden interface group
117 */
118 if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
119 goto suppress_route;
120
121 return false;
122
123suppress_route:
124 if (!(arg->flags & FIB_LOOKUP_NOREF))
125 fib_info_put(result->fi);
126 return true;
127}
104 128
105static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 129static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
106{ 130{
@@ -267,6 +291,7 @@ static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = {
267 .rule_size = sizeof(struct fib4_rule), 291 .rule_size = sizeof(struct fib4_rule),
268 .addr_size = sizeof(u32), 292 .addr_size = sizeof(u32),
269 .action = fib4_rule_action, 293 .action = fib4_rule_action,
294 .suppress = fib4_rule_suppress,
270 .match = fib4_rule_match, 295 .match = fib4_rule_match,
271 .configure = fib4_rule_configure, 296 .configure = fib4_rule_configure,
272 .delete = fib4_rule_delete, 297 .delete = fib4_rule_delete,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8f6cb7a87cd6..d5dbca5ecf62 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -169,7 +169,8 @@ static void free_nh_exceptions(struct fib_nh *nh)
169 169
170 next = rcu_dereference_protected(fnhe->fnhe_next, 1); 170 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
171 171
172 rt_fibinfo_free(&fnhe->fnhe_rth); 172 rt_fibinfo_free(&fnhe->fnhe_rth_input);
173 rt_fibinfo_free(&fnhe->fnhe_rth_output);
173 174
174 kfree(fnhe); 175 kfree(fnhe);
175 176
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 49616fed9340..3df6d3edb2a1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -71,7 +71,6 @@
71#include <linux/init.h> 71#include <linux/init.h>
72#include <linux/list.h> 72#include <linux/list.h>
73#include <linux/slab.h> 73#include <linux/slab.h>
74#include <linux/prefetch.h>
75#include <linux/export.h> 74#include <linux/export.h>
76#include <net/net_namespace.h> 75#include <net/net_namespace.h>
77#include <net/ip.h> 76#include <net/ip.h>
@@ -1761,10 +1760,8 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
1761 if (!c) 1760 if (!c)
1762 continue; 1761 continue;
1763 1762
1764 if (IS_LEAF(c)) { 1763 if (IS_LEAF(c))
1765 prefetch(rcu_dereference_rtnl(p->child[idx]));
1766 return (struct leaf *) c; 1764 return (struct leaf *) c;
1767 }
1768 1765
1769 /* Rescan start scanning in new node */ 1766 /* Rescan start scanning in new node */
1770 p = (struct tnode *) c; 1767 p = (struct tnode *) c;
@@ -2133,7 +2130,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2133 max--; 2130 max--;
2134 2131
2135 pointers = 0; 2132 pointers = 0;
2136 for (i = 1; i <= max; i++) 2133 for (i = 1; i < max; i++)
2137 if (stat->nodesizes[i] != 0) { 2134 if (stat->nodesizes[i] != 0) {
2138 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); 2135 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
2139 pointers += (1<<i) * stat->nodesizes[i]; 2136 pointers += (1<<i) * stat->nodesizes[i];
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
deleted file mode 100644
index b2e805af9b87..000000000000
--- a/net/ipv4/gre.c
+++ /dev/null
@@ -1,253 +0,0 @@
1/*
2 * GRE over IPv4 demultiplexer driver
3 *
4 * Authors: Dmitry Kozlov (xeb@mail.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include <linux/module.h>
16#include <linux/kernel.h>
17#include <linux/kmod.h>
18#include <linux/skbuff.h>
19#include <linux/in.h>
20#include <linux/ip.h>
21#include <linux/netdevice.h>
22#include <linux/if_tunnel.h>
23#include <linux/spinlock.h>
24#include <net/protocol.h>
25#include <net/gre.h>
26
27
28static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
29static DEFINE_SPINLOCK(gre_proto_lock);
30
31int gre_add_protocol(const struct gre_protocol *proto, u8 version)
32{
33 if (version >= GREPROTO_MAX)
34 goto err_out;
35
36 spin_lock(&gre_proto_lock);
37 if (gre_proto[version])
38 goto err_out_unlock;
39
40 RCU_INIT_POINTER(gre_proto[version], proto);
41 spin_unlock(&gre_proto_lock);
42 return 0;
43
44err_out_unlock:
45 spin_unlock(&gre_proto_lock);
46err_out:
47 return -1;
48}
49EXPORT_SYMBOL_GPL(gre_add_protocol);
50
51int gre_del_protocol(const struct gre_protocol *proto, u8 version)
52{
53 if (version >= GREPROTO_MAX)
54 goto err_out;
55
56 spin_lock(&gre_proto_lock);
57 if (rcu_dereference_protected(gre_proto[version],
58 lockdep_is_held(&gre_proto_lock)) != proto)
59 goto err_out_unlock;
60 RCU_INIT_POINTER(gre_proto[version], NULL);
61 spin_unlock(&gre_proto_lock);
62 synchronize_rcu();
63 return 0;
64
65err_out_unlock:
66 spin_unlock(&gre_proto_lock);
67err_out:
68 return -1;
69}
70EXPORT_SYMBOL_GPL(gre_del_protocol);
71
72static int gre_rcv(struct sk_buff *skb)
73{
74 const struct gre_protocol *proto;
75 u8 ver;
76 int ret;
77
78 if (!pskb_may_pull(skb, 12))
79 goto drop;
80
81 ver = skb->data[1]&0x7f;
82 if (ver >= GREPROTO_MAX)
83 goto drop;
84
85 rcu_read_lock();
86 proto = rcu_dereference(gre_proto[ver]);
87 if (!proto || !proto->handler)
88 goto drop_unlock;
89 ret = proto->handler(skb);
90 rcu_read_unlock();
91 return ret;
92
93drop_unlock:
94 rcu_read_unlock();
95drop:
96 kfree_skb(skb);
97 return NET_RX_DROP;
98}
99
100static void gre_err(struct sk_buff *skb, u32 info)
101{
102 const struct gre_protocol *proto;
103 const struct iphdr *iph = (const struct iphdr *)skb->data;
104 u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f;
105
106 if (ver >= GREPROTO_MAX)
107 return;
108
109 rcu_read_lock();
110 proto = rcu_dereference(gre_proto[ver]);
111 if (proto && proto->err_handler)
112 proto->err_handler(skb, info);
113 rcu_read_unlock();
114}
115
116static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
117 netdev_features_t features)
118{
119 struct sk_buff *segs = ERR_PTR(-EINVAL);
120 netdev_features_t enc_features;
121 int ghl = GRE_HEADER_SECTION;
122 struct gre_base_hdr *greh;
123 int mac_len = skb->mac_len;
124 __be16 protocol = skb->protocol;
125 int tnl_hlen;
126 bool csum;
127
128 if (unlikely(skb_shinfo(skb)->gso_type &
129 ~(SKB_GSO_TCPV4 |
130 SKB_GSO_TCPV6 |
131 SKB_GSO_UDP |
132 SKB_GSO_DODGY |
133 SKB_GSO_TCP_ECN |
134 SKB_GSO_GRE)))
135 goto out;
136
137 if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
138 goto out;
139
140 greh = (struct gre_base_hdr *)skb_transport_header(skb);
141
142 if (greh->flags & GRE_KEY)
143 ghl += GRE_HEADER_SECTION;
144 if (greh->flags & GRE_SEQ)
145 ghl += GRE_HEADER_SECTION;
146 if (greh->flags & GRE_CSUM) {
147 ghl += GRE_HEADER_SECTION;
148 csum = true;
149 } else
150 csum = false;
151
152 /* setup inner skb. */
153 skb->protocol = greh->protocol;
154 skb->encapsulation = 0;
155
156 if (unlikely(!pskb_may_pull(skb, ghl)))
157 goto out;
158 __skb_pull(skb, ghl);
159 skb_reset_mac_header(skb);
160 skb_set_network_header(skb, skb_inner_network_offset(skb));
161 skb->mac_len = skb_inner_network_offset(skb);
162
163 /* segment inner packet. */
164 enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
165 segs = skb_mac_gso_segment(skb, enc_features);
166 if (!segs || IS_ERR(segs))
167 goto out;
168
169 skb = segs;
170 tnl_hlen = skb_tnl_header_len(skb);
171 do {
172 __skb_push(skb, ghl);
173 if (csum) {
174 __be32 *pcsum;
175
176 if (skb_has_shared_frag(skb)) {
177 int err;
178
179 err = __skb_linearize(skb);
180 if (err) {
181 kfree_skb(segs);
182 segs = ERR_PTR(err);
183 goto out;
184 }
185 }
186
187 greh = (struct gre_base_hdr *)(skb->data);
188 pcsum = (__be32 *)(greh + 1);
189 *pcsum = 0;
190 *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
191 }
192 __skb_push(skb, tnl_hlen - ghl);
193
194 skb_reset_mac_header(skb);
195 skb_set_network_header(skb, mac_len);
196 skb->mac_len = mac_len;
197 skb->protocol = protocol;
198 } while ((skb = skb->next));
199out:
200 return segs;
201}
202
203static int gre_gso_send_check(struct sk_buff *skb)
204{
205 if (!skb->encapsulation)
206 return -EINVAL;
207 return 0;
208}
209
210static const struct net_protocol net_gre_protocol = {
211 .handler = gre_rcv,
212 .err_handler = gre_err,
213 .netns_ok = 1,
214};
215
216static const struct net_offload gre_offload = {
217 .callbacks = {
218 .gso_send_check = gre_gso_send_check,
219 .gso_segment = gre_gso_segment,
220 },
221};
222
223static int __init gre_init(void)
224{
225 pr_info("GRE over IPv4 demultiplexor driver\n");
226
227 if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
228 pr_err("can't add protocol\n");
229 return -EAGAIN;
230 }
231
232 if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
233 pr_err("can't add protocol offload\n");
234 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
235 return -EAGAIN;
236 }
237
238 return 0;
239}
240
241static void __exit gre_exit(void)
242{
243 inet_del_offload(&gre_offload, IPPROTO_GRE);
244 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
245}
246
247module_init(gre_init);
248module_exit(gre_exit);
249
250MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
251MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
252MODULE_LICENSE("GPL");
253
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
new file mode 100644
index 000000000000..736c9fc3ef93
--- /dev/null
+++ b/net/ipv4/gre_demux.c
@@ -0,0 +1,414 @@
1/*
2 * GRE over IPv4 demultiplexer driver
3 *
4 * Authors: Dmitry Kozlov (xeb@mail.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include <linux/module.h>
16#include <linux/if.h>
17#include <linux/icmp.h>
18#include <linux/kernel.h>
19#include <linux/kmod.h>
20#include <linux/skbuff.h>
21#include <linux/in.h>
22#include <linux/ip.h>
23#include <linux/netdevice.h>
24#include <linux/if_tunnel.h>
25#include <linux/spinlock.h>
26#include <net/protocol.h>
27#include <net/gre.h>
28
29#include <net/icmp.h>
30#include <net/route.h>
31#include <net/xfrm.h>
32
33static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
34static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX];
35
36int gre_add_protocol(const struct gre_protocol *proto, u8 version)
37{
38 if (version >= GREPROTO_MAX)
39 return -EINVAL;
40
41 return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ?
42 0 : -EBUSY;
43}
44EXPORT_SYMBOL_GPL(gre_add_protocol);
45
46int gre_del_protocol(const struct gre_protocol *proto, u8 version)
47{
48 int ret;
49
50 if (version >= GREPROTO_MAX)
51 return -EINVAL;
52
53 ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ?
54 0 : -EBUSY;
55
56 if (ret)
57 return ret;
58
59 synchronize_rcu();
60 return 0;
61}
62EXPORT_SYMBOL_GPL(gre_del_protocol);
63
64void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
65 int hdr_len)
66{
67 struct gre_base_hdr *greh;
68
69 skb_push(skb, hdr_len);
70
71 greh = (struct gre_base_hdr *)skb->data;
72 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
73 greh->protocol = tpi->proto;
74
75 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
76 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
77
78 if (tpi->flags&TUNNEL_SEQ) {
79 *ptr = tpi->seq;
80 ptr--;
81 }
82 if (tpi->flags&TUNNEL_KEY) {
83 *ptr = tpi->key;
84 ptr--;
85 }
86 if (tpi->flags&TUNNEL_CSUM &&
87 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
88 *ptr = 0;
89 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
90 skb->len, 0));
91 }
92 }
93}
94EXPORT_SYMBOL_GPL(gre_build_header);
95
96struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
97{
98 int err;
99
100 if (likely(!skb->encapsulation)) {
101 skb_reset_inner_headers(skb);
102 skb->encapsulation = 1;
103 }
104
105 if (skb_is_gso(skb)) {
106 err = skb_unclone(skb, GFP_ATOMIC);
107 if (unlikely(err))
108 goto error;
109 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
110 return skb;
111 } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) {
112 err = skb_checksum_help(skb);
113 if (unlikely(err))
114 goto error;
115 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
116 skb->ip_summed = CHECKSUM_NONE;
117
118 return skb;
119error:
120 kfree_skb(skb);
121 return ERR_PTR(err);
122}
123EXPORT_SYMBOL_GPL(gre_handle_offloads);
124
125static __sum16 check_checksum(struct sk_buff *skb)
126{
127 __sum16 csum = 0;
128
129 switch (skb->ip_summed) {
130 case CHECKSUM_COMPLETE:
131 csum = csum_fold(skb->csum);
132
133 if (!csum)
134 break;
135 /* Fall through. */
136
137 case CHECKSUM_NONE:
138 skb->csum = 0;
139 csum = __skb_checksum_complete(skb);
140 skb->ip_summed = CHECKSUM_COMPLETE;
141 break;
142 }
143
144 return csum;
145}
146
147static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
148 bool *csum_err)
149{
150 unsigned int ip_hlen = ip_hdrlen(skb);
151 const struct gre_base_hdr *greh;
152 __be32 *options;
153 int hdr_len;
154
155 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
156 return -EINVAL;
157
158 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
159 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
160 return -EINVAL;
161
162 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
163 hdr_len = ip_gre_calc_hlen(tpi->flags);
164
165 if (!pskb_may_pull(skb, hdr_len))
166 return -EINVAL;
167
168 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
169 tpi->proto = greh->protocol;
170
171 options = (__be32 *)(greh + 1);
172 if (greh->flags & GRE_CSUM) {
173 if (check_checksum(skb)) {
174 *csum_err = true;
175 return -EINVAL;
176 }
177 options++;
178 }
179
180 if (greh->flags & GRE_KEY) {
181 tpi->key = *options;
182 options++;
183 } else
184 tpi->key = 0;
185
186 if (unlikely(greh->flags & GRE_SEQ)) {
187 tpi->seq = *options;
188 options++;
189 } else
190 tpi->seq = 0;
191
192 /* WCCP version 1 and 2 protocol decoding.
193 * - Change protocol to IP
194 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
195 */
196 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
197 tpi->proto = htons(ETH_P_IP);
198 if ((*(u8 *)options & 0xF0) != 0x40) {
199 hdr_len += 4;
200 if (!pskb_may_pull(skb, hdr_len))
201 return -EINVAL;
202 }
203 }
204
205 return iptunnel_pull_header(skb, hdr_len, tpi->proto);
206}
207
208static int gre_cisco_rcv(struct sk_buff *skb)
209{
210 struct tnl_ptk_info tpi;
211 int i;
212 bool csum_err = false;
213
214 if (parse_gre_header(skb, &tpi, &csum_err) < 0)
215 goto drop;
216
217 rcu_read_lock();
218 for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
219 struct gre_cisco_protocol *proto;
220 int ret;
221
222 proto = rcu_dereference(gre_cisco_proto_list[i]);
223 if (!proto)
224 continue;
225 ret = proto->handler(skb, &tpi);
226 if (ret == PACKET_RCVD) {
227 rcu_read_unlock();
228 return 0;
229 }
230 }
231 rcu_read_unlock();
232
233 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
234drop:
235 kfree_skb(skb);
236 return 0;
237}
238
239static void gre_cisco_err(struct sk_buff *skb, u32 info)
240{
241 /* All the routers (except for Linux) return only
242 * 8 bytes of packet payload. It means, that precise relaying of
243 * ICMP in the real Internet is absolutely infeasible.
244 *
245 * Moreover, Cisco "wise men" put GRE key to the third word
246 * in GRE header. It makes impossible maintaining even soft
247 * state for keyed
248 * GRE tunnels with enabled checksum. Tell them "thank you".
249 *
250 * Well, I wonder, rfc1812 was written by Cisco employee,
251 * what the hell these idiots break standards established
252 * by themselves???
253 */
254
255 const int type = icmp_hdr(skb)->type;
256 const int code = icmp_hdr(skb)->code;
257 struct tnl_ptk_info tpi;
258 bool csum_err = false;
259 int i;
260
261 if (parse_gre_header(skb, &tpi, &csum_err)) {
262 if (!csum_err) /* ignore csum errors. */
263 return;
264 }
265
266 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
267 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
268 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
269 return;
270 }
271 if (type == ICMP_REDIRECT) {
272 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
273 IPPROTO_GRE, 0);
274 return;
275 }
276
277 rcu_read_lock();
278 for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
279 struct gre_cisco_protocol *proto;
280
281 proto = rcu_dereference(gre_cisco_proto_list[i]);
282 if (!proto)
283 continue;
284
285 if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD)
286 goto out;
287
288 }
289out:
290 rcu_read_unlock();
291}
292
293static int gre_rcv(struct sk_buff *skb)
294{
295 const struct gre_protocol *proto;
296 u8 ver;
297 int ret;
298
299 if (!pskb_may_pull(skb, 12))
300 goto drop;
301
302 ver = skb->data[1]&0x7f;
303 if (ver >= GREPROTO_MAX)
304 goto drop;
305
306 rcu_read_lock();
307 proto = rcu_dereference(gre_proto[ver]);
308 if (!proto || !proto->handler)
309 goto drop_unlock;
310 ret = proto->handler(skb);
311 rcu_read_unlock();
312 return ret;
313
314drop_unlock:
315 rcu_read_unlock();
316drop:
317 kfree_skb(skb);
318 return NET_RX_DROP;
319}
320
321static void gre_err(struct sk_buff *skb, u32 info)
322{
323 const struct gre_protocol *proto;
324 const struct iphdr *iph = (const struct iphdr *)skb->data;
325 u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f;
326
327 if (ver >= GREPROTO_MAX)
328 return;
329
330 rcu_read_lock();
331 proto = rcu_dereference(gre_proto[ver]);
332 if (proto && proto->err_handler)
333 proto->err_handler(skb, info);
334 rcu_read_unlock();
335}
336
337static const struct net_protocol net_gre_protocol = {
338 .handler = gre_rcv,
339 .err_handler = gre_err,
340 .netns_ok = 1,
341};
342
343static const struct gre_protocol ipgre_protocol = {
344 .handler = gre_cisco_rcv,
345 .err_handler = gre_cisco_err,
346};
347
348int gre_cisco_register(struct gre_cisco_protocol *newp)
349{
350 struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
351 &gre_cisco_proto_list[newp->priority];
352
353 return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY;
354}
355EXPORT_SYMBOL_GPL(gre_cisco_register);
356
357int gre_cisco_unregister(struct gre_cisco_protocol *del_proto)
358{
359 struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
360 &gre_cisco_proto_list[del_proto->priority];
361 int ret;
362
363 ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL;
364
365 if (ret)
366 return ret;
367
368 synchronize_net();
369 return 0;
370}
371EXPORT_SYMBOL_GPL(gre_cisco_unregister);
372
373static int __init gre_init(void)
374{
375 pr_info("GRE over IPv4 demultiplexor driver\n");
376
377 if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
378 pr_err("can't add protocol\n");
379 goto err;
380 }
381
382 if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) {
383 pr_info("%s: can't add ipgre handler\n", __func__);
384 goto err_gre;
385 }
386
387 if (gre_offload_init()) {
388 pr_err("can't add protocol offload\n");
389 goto err_gso;
390 }
391
392 return 0;
393err_gso:
394 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
395err_gre:
396 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
397err:
398 return -EAGAIN;
399}
400
401static void __exit gre_exit(void)
402{
403 gre_offload_exit();
404
405 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
406 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
407}
408
409module_init(gre_init);
410module_exit(gre_exit);
411
412MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
413MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
414MODULE_LICENSE("GPL");
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
new file mode 100644
index 000000000000..55e6bfb3a289
--- /dev/null
+++ b/net/ipv4/gre_offload.c
@@ -0,0 +1,130 @@
1/*
2 * IPV4 GSO/GRO offload support
3 * Linux INET implementation
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * GRE GSO support
11 */
12
13#include <linux/skbuff.h>
14#include <net/protocol.h>
15#include <net/gre.h>
16
17static int gre_gso_send_check(struct sk_buff *skb)
18{
19 if (!skb->encapsulation)
20 return -EINVAL;
21 return 0;
22}
23
24static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
25 netdev_features_t features)
26{
27 struct sk_buff *segs = ERR_PTR(-EINVAL);
28 netdev_features_t enc_features;
29 int ghl = GRE_HEADER_SECTION;
30 struct gre_base_hdr *greh;
31 int mac_len = skb->mac_len;
32 __be16 protocol = skb->protocol;
33 int tnl_hlen;
34 bool csum;
35
36 if (unlikely(skb_shinfo(skb)->gso_type &
37 ~(SKB_GSO_TCPV4 |
38 SKB_GSO_TCPV6 |
39 SKB_GSO_UDP |
40 SKB_GSO_DODGY |
41 SKB_GSO_TCP_ECN |
42 SKB_GSO_GRE)))
43 goto out;
44
45 if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
46 goto out;
47
48 greh = (struct gre_base_hdr *)skb_transport_header(skb);
49
50 if (greh->flags & GRE_KEY)
51 ghl += GRE_HEADER_SECTION;
52 if (greh->flags & GRE_SEQ)
53 ghl += GRE_HEADER_SECTION;
54 if (greh->flags & GRE_CSUM) {
55 ghl += GRE_HEADER_SECTION;
56 csum = true;
57 } else
58 csum = false;
59
60 /* setup inner skb. */
61 skb->protocol = greh->protocol;
62 skb->encapsulation = 0;
63
64 if (unlikely(!pskb_may_pull(skb, ghl)))
65 goto out;
66
67 __skb_pull(skb, ghl);
68 skb_reset_mac_header(skb);
69 skb_set_network_header(skb, skb_inner_network_offset(skb));
70 skb->mac_len = skb_inner_network_offset(skb);
71
72 /* segment inner packet. */
73 enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
74 segs = skb_mac_gso_segment(skb, enc_features);
75 if (!segs || IS_ERR(segs))
76 goto out;
77
78 skb = segs;
79 tnl_hlen = skb_tnl_header_len(skb);
80 do {
81 __skb_push(skb, ghl);
82 if (csum) {
83 __be32 *pcsum;
84
85 if (skb_has_shared_frag(skb)) {
86 int err;
87
88 err = __skb_linearize(skb);
89 if (err) {
90 kfree_skb_list(segs);
91 segs = ERR_PTR(err);
92 goto out;
93 }
94 }
95
96 greh = (struct gre_base_hdr *)(skb->data);
97 pcsum = (__be32 *)(greh + 1);
98 *pcsum = 0;
99 *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
100 }
101 __skb_push(skb, tnl_hlen - ghl);
102
103 skb_reset_inner_headers(skb);
104 skb->encapsulation = 1;
105
106 skb_reset_mac_header(skb);
107 skb_set_network_header(skb, mac_len);
108 skb->mac_len = mac_len;
109 skb->protocol = protocol;
110 } while ((skb = skb->next));
111out:
112 return segs;
113}
114
115static const struct net_offload gre_offload = {
116 .callbacks = {
117 .gso_send_check = gre_gso_send_check,
118 .gso_segment = gre_gso_segment,
119 },
120};
121
122int __init gre_offload_init(void)
123{
124 return inet_add_offload(&gre_offload, IPPROTO_GRE);
125}
126
127void __exit gre_offload_exit(void)
128{
129 inet_del_offload(&gre_offload, IPPROTO_GRE);
130}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 76e10b47e053..5f7d11a45871 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -482,7 +482,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
482{ 482{
483 struct iphdr *iph; 483 struct iphdr *iph;
484 int room; 484 int room;
485 struct icmp_bxm icmp_param; 485 struct icmp_bxm *icmp_param;
486 struct rtable *rt = skb_rtable(skb_in); 486 struct rtable *rt = skb_rtable(skb_in);
487 struct ipcm_cookie ipc; 487 struct ipcm_cookie ipc;
488 struct flowi4 fl4; 488 struct flowi4 fl4;
@@ -503,7 +503,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
503 iph = ip_hdr(skb_in); 503 iph = ip_hdr(skb_in);
504 504
505 if ((u8 *)iph < skb_in->head || 505 if ((u8 *)iph < skb_in->head ||
506 (skb_in->network_header + sizeof(*iph)) > skb_in->tail) 506 (skb_network_header(skb_in) + sizeof(*iph)) >
507 skb_tail_pointer(skb_in))
507 goto out; 508 goto out;
508 509
509 /* 510 /*
@@ -557,9 +558,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
557 } 558 }
558 } 559 }
559 560
561 icmp_param = kmalloc(sizeof(*icmp_param), GFP_ATOMIC);
562 if (!icmp_param)
563 return;
564
560 sk = icmp_xmit_lock(net); 565 sk = icmp_xmit_lock(net);
561 if (sk == NULL) 566 if (sk == NULL)
562 return; 567 goto out_free;
563 568
564 /* 569 /*
565 * Construct source address and options. 570 * Construct source address and options.
@@ -585,7 +590,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
585 IPTOS_PREC_INTERNETCONTROL) : 590 IPTOS_PREC_INTERNETCONTROL) :
586 iph->tos; 591 iph->tos;
587 592
588 if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) 593 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
589 goto out_unlock; 594 goto out_unlock;
590 595
591 596
@@ -593,19 +598,19 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
593 * Prepare data for ICMP header. 598 * Prepare data for ICMP header.
594 */ 599 */
595 600
596 icmp_param.data.icmph.type = type; 601 icmp_param->data.icmph.type = type;
597 icmp_param.data.icmph.code = code; 602 icmp_param->data.icmph.code = code;
598 icmp_param.data.icmph.un.gateway = info; 603 icmp_param->data.icmph.un.gateway = info;
599 icmp_param.data.icmph.checksum = 0; 604 icmp_param->data.icmph.checksum = 0;
600 icmp_param.skb = skb_in; 605 icmp_param->skb = skb_in;
601 icmp_param.offset = skb_network_offset(skb_in); 606 icmp_param->offset = skb_network_offset(skb_in);
602 inet_sk(sk)->tos = tos; 607 inet_sk(sk)->tos = tos;
603 ipc.addr = iph->saddr; 608 ipc.addr = iph->saddr;
604 ipc.opt = &icmp_param.replyopts.opt; 609 ipc.opt = &icmp_param->replyopts.opt;
605 ipc.tx_flags = 0; 610 ipc.tx_flags = 0;
606 611
607 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, 612 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
608 type, code, &icmp_param); 613 type, code, icmp_param);
609 if (IS_ERR(rt)) 614 if (IS_ERR(rt))
610 goto out_unlock; 615 goto out_unlock;
611 616
@@ -617,19 +622,21 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
617 room = dst_mtu(&rt->dst); 622 room = dst_mtu(&rt->dst);
618 if (room > 576) 623 if (room > 576)
619 room = 576; 624 room = 576;
620 room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; 625 room -= sizeof(struct iphdr) + icmp_param->replyopts.opt.opt.optlen;
621 room -= sizeof(struct icmphdr); 626 room -= sizeof(struct icmphdr);
622 627
623 icmp_param.data_len = skb_in->len - icmp_param.offset; 628 icmp_param->data_len = skb_in->len - icmp_param->offset;
624 if (icmp_param.data_len > room) 629 if (icmp_param->data_len > room)
625 icmp_param.data_len = room; 630 icmp_param->data_len = room;
626 icmp_param.head_len = sizeof(struct icmphdr); 631 icmp_param->head_len = sizeof(struct icmphdr);
627 632
628 icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); 633 icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
629ende: 634ende:
630 ip_rt_put(rt); 635 ip_rt_put(rt);
631out_unlock: 636out_unlock:
632 icmp_xmit_unlock(sk); 637 icmp_xmit_unlock(sk);
638out_free:
639 kfree(icmp_param);
633out:; 640out:;
634} 641}
635EXPORT_SYMBOL(icmp_send); 642EXPORT_SYMBOL(icmp_send);
@@ -657,7 +664,8 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
657} 664}
658 665
659/* 666/*
660 * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. 667 * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
668 * ICMP_PARAMETERPROB.
661 */ 669 */
662 670
663static void icmp_unreach(struct sk_buff *skb) 671static void icmp_unreach(struct sk_buff *skb)
@@ -939,7 +947,8 @@ error:
939void icmp_err(struct sk_buff *skb, u32 info) 947void icmp_err(struct sk_buff *skb, u32 info)
940{ 948{
941 struct iphdr *iph = (struct iphdr *)skb->data; 949 struct iphdr *iph = (struct iphdr *)skb->data;
942 struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); 950 int offset = iph->ihl<<2;
951 struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset);
943 int type = icmp_hdr(skb)->type; 952 int type = icmp_hdr(skb)->type;
944 int code = icmp_hdr(skb)->code; 953 int code = icmp_hdr(skb)->code;
945 struct net *net = dev_net(skb->dev); 954 struct net *net = dev_net(skb->dev);
@@ -949,7 +958,7 @@ void icmp_err(struct sk_buff *skb, u32 info)
949 * triggered by ICMP_ECHOREPLY which sent from kernel. 958 * triggered by ICMP_ECHOREPLY which sent from kernel.
950 */ 959 */
951 if (icmph->type != ICMP_ECHOREPLY) { 960 if (icmph->type != ICMP_ECHOREPLY) {
952 ping_err(skb, info); 961 ping_err(skb, offset, info);
953 return; 962 return;
954 } 963 }
955 964
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d8c232794bcb..7defdc9ba167 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -88,6 +88,7 @@
88#include <linux/if_arp.h> 88#include <linux/if_arp.h>
89#include <linux/rtnetlink.h> 89#include <linux/rtnetlink.h>
90#include <linux/times.h> 90#include <linux/times.h>
91#include <linux/pkt_sched.h>
91 92
92#include <net/net_namespace.h> 93#include <net/net_namespace.h>
93#include <net/arp.h> 94#include <net/arp.h>
@@ -113,7 +114,8 @@
113 114
114#define IGMP_V1_Router_Present_Timeout (400*HZ) 115#define IGMP_V1_Router_Present_Timeout (400*HZ)
115#define IGMP_V2_Router_Present_Timeout (400*HZ) 116#define IGMP_V2_Router_Present_Timeout (400*HZ)
116#define IGMP_Unsolicited_Report_Interval (10*HZ) 117#define IGMP_V2_Unsolicited_Report_Interval (10*HZ)
118#define IGMP_V3_Unsolicited_Report_Interval (1*HZ)
117#define IGMP_Query_Response_Interval (10*HZ) 119#define IGMP_Query_Response_Interval (10*HZ)
118#define IGMP_Unsolicited_Report_Count 2 120#define IGMP_Unsolicited_Report_Count 2
119 121
@@ -138,6 +140,29 @@
138 ((in_dev)->mr_v2_seen && \ 140 ((in_dev)->mr_v2_seen && \
139 time_before(jiffies, (in_dev)->mr_v2_seen))) 141 time_before(jiffies, (in_dev)->mr_v2_seen)))
140 142
143static int unsolicited_report_interval(struct in_device *in_dev)
144{
145 int interval_ms, interval_jiffies;
146
147 if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
148 interval_ms = IN_DEV_CONF_GET(
149 in_dev,
150 IGMPV2_UNSOLICITED_REPORT_INTERVAL);
151 else /* v3 */
152 interval_ms = IN_DEV_CONF_GET(
153 in_dev,
154 IGMPV3_UNSOLICITED_REPORT_INTERVAL);
155
156 interval_jiffies = msecs_to_jiffies(interval_ms);
157
158 /* _timer functions can't handle a delay of 0 jiffies so ensure
159 * we always return a positive value.
160 */
161 if (interval_jiffies <= 0)
162 interval_jiffies = 1;
163 return interval_jiffies;
164}
165
141static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); 166static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
142static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); 167static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr);
143static void igmpv3_clear_delrec(struct in_device *in_dev); 168static void igmpv3_clear_delrec(struct in_device *in_dev);
@@ -315,6 +340,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
315 if (size < 256) 340 if (size < 256)
316 return NULL; 341 return NULL;
317 } 342 }
343 skb->priority = TC_PRIO_CONTROL;
318 igmp_skb_size(skb) = size; 344 igmp_skb_size(skb) = size;
319 345
320 rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0, 346 rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
@@ -343,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
343 pip->saddr = fl4.saddr; 369 pip->saddr = fl4.saddr;
344 pip->protocol = IPPROTO_IGMP; 370 pip->protocol = IPPROTO_IGMP;
345 pip->tot_len = 0; /* filled in later */ 371 pip->tot_len = 0; /* filled in later */
346 ip_select_ident(pip, &rt->dst, NULL); 372 ip_select_ident(skb, &rt->dst, NULL);
347 ((u8 *)&pip[1])[0] = IPOPT_RA; 373 ((u8 *)&pip[1])[0] = IPOPT_RA;
348 ((u8 *)&pip[1])[1] = 4; 374 ((u8 *)&pip[1])[1] = 4;
349 ((u8 *)&pip[1])[2] = 0; 375 ((u8 *)&pip[1])[2] = 0;
@@ -363,7 +389,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
363static int igmpv3_sendpack(struct sk_buff *skb) 389static int igmpv3_sendpack(struct sk_buff *skb)
364{ 390{
365 struct igmphdr *pig = igmp_hdr(skb); 391 struct igmphdr *pig = igmp_hdr(skb);
366 const int igmplen = skb->tail - skb->transport_header; 392 const int igmplen = skb_tail_pointer(skb) - skb_transport_header(skb);
367 393
368 pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); 394 pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
369 395
@@ -670,6 +696,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
670 ip_rt_put(rt); 696 ip_rt_put(rt);
671 return -1; 697 return -1;
672 } 698 }
699 skb->priority = TC_PRIO_CONTROL;
673 700
674 skb_dst_set(skb, &rt->dst); 701 skb_dst_set(skb, &rt->dst);
675 702
@@ -687,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
687 iph->daddr = dst; 714 iph->daddr = dst;
688 iph->saddr = fl4.saddr; 715 iph->saddr = fl4.saddr;
689 iph->protocol = IPPROTO_IGMP; 716 iph->protocol = IPPROTO_IGMP;
690 ip_select_ident(iph, &rt->dst, NULL); 717 ip_select_ident(skb, &rt->dst, NULL);
691 ((u8 *)&iph[1])[0] = IPOPT_RA; 718 ((u8 *)&iph[1])[0] = IPOPT_RA;
692 ((u8 *)&iph[1])[1] = 4; 719 ((u8 *)&iph[1])[1] = 4;
693 ((u8 *)&iph[1])[2] = 0; 720 ((u8 *)&iph[1])[2] = 0;
@@ -709,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data)
709 736
710 in_dev->mr_gq_running = 0; 737 in_dev->mr_gq_running = 0;
711 igmpv3_send_report(in_dev, NULL); 738 igmpv3_send_report(in_dev, NULL);
712 __in_dev_put(in_dev); 739 in_dev_put(in_dev);
713} 740}
714 741
715static void igmp_ifc_timer_expire(unsigned long data) 742static void igmp_ifc_timer_expire(unsigned long data)
@@ -719,9 +746,10 @@ static void igmp_ifc_timer_expire(unsigned long data)
719 igmpv3_send_cr(in_dev); 746 igmpv3_send_cr(in_dev);
720 if (in_dev->mr_ifc_count) { 747 if (in_dev->mr_ifc_count) {
721 in_dev->mr_ifc_count--; 748 in_dev->mr_ifc_count--;
722 igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval); 749 igmp_ifc_start_timer(in_dev,
750 unsolicited_report_interval(in_dev));
723 } 751 }
724 __in_dev_put(in_dev); 752 in_dev_put(in_dev);
725} 753}
726 754
727static void igmp_ifc_event(struct in_device *in_dev) 755static void igmp_ifc_event(struct in_device *in_dev)
@@ -744,7 +772,7 @@ static void igmp_timer_expire(unsigned long data)
744 772
745 if (im->unsolicit_count) { 773 if (im->unsolicit_count) {
746 im->unsolicit_count--; 774 im->unsolicit_count--;
747 igmp_start_timer(im, IGMP_Unsolicited_Report_Interval); 775 igmp_start_timer(im, unsolicited_report_interval(in_dev));
748 } 776 }
749 im->reporter = 1; 777 im->reporter = 1;
750 spin_unlock(&im->lock); 778 spin_unlock(&im->lock);
@@ -1217,6 +1245,57 @@ static void igmp_group_added(struct ip_mc_list *im)
1217 * Multicast list managers 1245 * Multicast list managers
1218 */ 1246 */
1219 1247
1248static u32 ip_mc_hash(const struct ip_mc_list *im)
1249{
1250 return hash_32((__force u32)im->multiaddr, MC_HASH_SZ_LOG);
1251}
1252
1253static void ip_mc_hash_add(struct in_device *in_dev,
1254 struct ip_mc_list *im)
1255{
1256 struct ip_mc_list __rcu **mc_hash;
1257 u32 hash;
1258
1259 mc_hash = rtnl_dereference(in_dev->mc_hash);
1260 if (mc_hash) {
1261 hash = ip_mc_hash(im);
1262 im->next_hash = mc_hash[hash];
1263 rcu_assign_pointer(mc_hash[hash], im);
1264 return;
1265 }
1266
1267 /* do not use a hash table for small number of items */
1268 if (in_dev->mc_count < 4)
1269 return;
1270
1271 mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG,
1272 GFP_KERNEL);
1273 if (!mc_hash)
1274 return;
1275
1276 for_each_pmc_rtnl(in_dev, im) {
1277 hash = ip_mc_hash(im);
1278 im->next_hash = mc_hash[hash];
1279 RCU_INIT_POINTER(mc_hash[hash], im);
1280 }
1281
1282 rcu_assign_pointer(in_dev->mc_hash, mc_hash);
1283}
1284
1285static void ip_mc_hash_remove(struct in_device *in_dev,
1286 struct ip_mc_list *im)
1287{
1288 struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash);
1289 struct ip_mc_list *aux;
1290
1291 if (!mc_hash)
1292 return;
1293 mc_hash += ip_mc_hash(im);
1294 while ((aux = rtnl_dereference(*mc_hash)) != im)
1295 mc_hash = &aux->next_hash;
1296 *mc_hash = im->next_hash;
1297}
1298
1220 1299
1221/* 1300/*
1222 * A socket has joined a multicast group on device dev. 1301 * A socket has joined a multicast group on device dev.
@@ -1258,6 +1337,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1258 in_dev->mc_count++; 1337 in_dev->mc_count++;
1259 rcu_assign_pointer(in_dev->mc_list, im); 1338 rcu_assign_pointer(in_dev->mc_list, im);
1260 1339
1340 ip_mc_hash_add(in_dev, im);
1341
1261#ifdef CONFIG_IP_MULTICAST 1342#ifdef CONFIG_IP_MULTICAST
1262 igmpv3_del_delrec(in_dev, im->multiaddr); 1343 igmpv3_del_delrec(in_dev, im->multiaddr);
1263#endif 1344#endif
@@ -1270,16 +1351,17 @@ out:
1270EXPORT_SYMBOL(ip_mc_inc_group); 1351EXPORT_SYMBOL(ip_mc_inc_group);
1271 1352
1272/* 1353/*
1273 * Resend IGMP JOIN report; used for bonding. 1354 * Resend IGMP JOIN report; used by netdev notifier.
1274 * Called with rcu_read_lock()
1275 */ 1355 */
1276void ip_mc_rejoin_groups(struct in_device *in_dev) 1356static void ip_mc_rejoin_groups(struct in_device *in_dev)
1277{ 1357{
1278#ifdef CONFIG_IP_MULTICAST 1358#ifdef CONFIG_IP_MULTICAST
1279 struct ip_mc_list *im; 1359 struct ip_mc_list *im;
1280 int type; 1360 int type;
1281 1361
1282 for_each_pmc_rcu(in_dev, im) { 1362 ASSERT_RTNL();
1363
1364 for_each_pmc_rtnl(in_dev, im) {
1283 if (im->multiaddr == IGMP_ALL_HOSTS) 1365 if (im->multiaddr == IGMP_ALL_HOSTS)
1284 continue; 1366 continue;
1285 1367
@@ -1296,7 +1378,6 @@ void ip_mc_rejoin_groups(struct in_device *in_dev)
1296 } 1378 }
1297#endif 1379#endif
1298} 1380}
1299EXPORT_SYMBOL(ip_mc_rejoin_groups);
1300 1381
1301/* 1382/*
1302 * A socket has left a multicast group on device dev 1383 * A socket has left a multicast group on device dev
@@ -1314,6 +1395,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1314 ip = &i->next_rcu) { 1395 ip = &i->next_rcu) {
1315 if (i->multiaddr == addr) { 1396 if (i->multiaddr == addr) {
1316 if (--i->users == 0) { 1397 if (--i->users == 0) {
1398 ip_mc_hash_remove(in_dev, i);
1317 *ip = i->next_rcu; 1399 *ip = i->next_rcu;
1318 in_dev->mc_count--; 1400 in_dev->mc_count--;
1319 igmp_group_dropped(i); 1401 igmp_group_dropped(i);
@@ -1381,13 +1463,9 @@ void ip_mc_init_dev(struct in_device *in_dev)
1381{ 1463{
1382 ASSERT_RTNL(); 1464 ASSERT_RTNL();
1383 1465
1384 in_dev->mc_tomb = NULL;
1385#ifdef CONFIG_IP_MULTICAST 1466#ifdef CONFIG_IP_MULTICAST
1386 in_dev->mr_gq_running = 0;
1387 setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 1467 setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire,
1388 (unsigned long)in_dev); 1468 (unsigned long)in_dev);
1389 in_dev->mr_ifc_count = 0;
1390 in_dev->mc_count = 0;
1391 setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 1469 setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
1392 (unsigned long)in_dev); 1470 (unsigned long)in_dev);
1393 in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; 1471 in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
@@ -2321,12 +2399,25 @@ void ip_mc_drop_socket(struct sock *sk)
2321int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) 2399int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto)
2322{ 2400{
2323 struct ip_mc_list *im; 2401 struct ip_mc_list *im;
2402 struct ip_mc_list __rcu **mc_hash;
2324 struct ip_sf_list *psf; 2403 struct ip_sf_list *psf;
2325 int rv = 0; 2404 int rv = 0;
2326 2405
2327 for_each_pmc_rcu(in_dev, im) { 2406 mc_hash = rcu_dereference(in_dev->mc_hash);
2328 if (im->multiaddr == mc_addr) 2407 if (mc_hash) {
2329 break; 2408 u32 hash = hash_32((__force u32)mc_addr, MC_HASH_SZ_LOG);
2409
2410 for (im = rcu_dereference(mc_hash[hash]);
2411 im != NULL;
2412 im = rcu_dereference(im->next_hash)) {
2413 if (im->multiaddr == mc_addr)
2414 break;
2415 }
2416 } else {
2417 for_each_pmc_rcu(in_dev, im) {
2418 if (im->multiaddr == mc_addr)
2419 break;
2420 }
2330 } 2421 }
2331 if (im && proto == IPPROTO_IGMP) { 2422 if (im && proto == IPPROTO_IGMP) {
2332 rv = 1; 2423 rv = 1;
@@ -2672,8 +2763,42 @@ static struct pernet_operations igmp_net_ops = {
2672 .exit = igmp_net_exit, 2763 .exit = igmp_net_exit,
2673}; 2764};
2674 2765
2766static int igmp_netdev_event(struct notifier_block *this,
2767 unsigned long event, void *ptr)
2768{
2769 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2770 struct in_device *in_dev;
2771
2772 switch (event) {
2773 case NETDEV_RESEND_IGMP:
2774 in_dev = __in_dev_get_rtnl(dev);
2775 if (in_dev)
2776 ip_mc_rejoin_groups(in_dev);
2777 break;
2778 default:
2779 break;
2780 }
2781 return NOTIFY_DONE;
2782}
2783
2784static struct notifier_block igmp_notifier = {
2785 .notifier_call = igmp_netdev_event,
2786};
2787
2675int __init igmp_mc_proc_init(void) 2788int __init igmp_mc_proc_init(void)
2676{ 2789{
2677 return register_pernet_subsys(&igmp_net_ops); 2790 int err;
2791
2792 err = register_pernet_subsys(&igmp_net_ops);
2793 if (err)
2794 return err;
2795 err = register_netdevice_notifier(&igmp_notifier);
2796 if (err)
2797 goto reg_notif_fail;
2798 return 0;
2799
2800reg_notif_fail:
2801 unregister_pernet_subsys(&igmp_net_ops);
2802 return err;
2678} 2803}
2679#endif 2804#endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 7e06641e36ae..c5313a9c019b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -93,7 +93,7 @@ void inet_frags_init(struct inet_frags *f)
93 } 93 }
94 rwlock_init(&f->lock); 94 rwlock_init(&f->lock);
95 95
96 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 96 f->rnd = (u32) ((totalram_pages ^ (totalram_pages >> 7)) ^
97 (jiffies ^ (jiffies >> 6))); 97 (jiffies ^ (jiffies >> 6)));
98 98
99 setup_timer(&f->secret_timer, inet_frag_secret_rebuild, 99 setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
@@ -247,8 +247,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
247{ 247{
248 struct inet_frag_bucket *hb; 248 struct inet_frag_bucket *hb;
249 struct inet_frag_queue *qp; 249 struct inet_frag_queue *qp;
250#ifdef CONFIG_SMP
251#endif
252 unsigned int hash; 250 unsigned int hash;
253 251
254 read_lock(&f->lock); /* Protects against hash rebuild */ 252 read_lock(&f->lock); /* Protects against hash rebuild */
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 6af375afeeef..7bd8983dbfcf 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -467,7 +467,7 @@ void inet_unhash(struct sock *sk)
467 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 467 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
468 468
469 spin_lock_bh(lock); 469 spin_lock_bh(lock);
470 done =__sk_nulls_del_node_init_rcu(sk); 470 done = __sk_nulls_del_node_init_rcu(sk);
471 if (done) 471 if (done)
472 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 472 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
473 spin_unlock_bh(lock); 473 spin_unlock_bh(lock);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 000e3d239d64..33d5537881ed 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -32,8 +32,8 @@
32 * At the moment of writing this notes identifier of IP packets is generated 32 * At the moment of writing this notes identifier of IP packets is generated
33 * to be unpredictable using this code only for packets subjected 33 * to be unpredictable using this code only for packets subjected
34 * (actually or potentially) to defragmentation. I.e. DF packets less than 34 * (actually or potentially) to defragmentation. I.e. DF packets less than
35 * PMTU in size uses a constant ID and do not use this code (see 35 * PMTU in size when local fragmentation is disabled use a constant ID and do
36 * ip_select_ident() in include/net/ip.h). 36 * not use this code (see ip_select_ident() in include/net/ip.h).
37 * 37 *
38 * Route cache entries hold references to our nodes. 38 * Route cache entries hold references to our nodes.
39 * New cache entries get references via lookup by destination IP address in 39 * New cache entries get references via lookup by destination IP address in
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2a83591492dd..d7aea4c5b940 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -121,103 +121,8 @@ static int ipgre_tunnel_init(struct net_device *dev);
121static int ipgre_net_id __read_mostly; 121static int ipgre_net_id __read_mostly;
122static int gre_tap_net_id __read_mostly; 122static int gre_tap_net_id __read_mostly;
123 123
124static __sum16 check_checksum(struct sk_buff *skb) 124static int ipgre_err(struct sk_buff *skb, u32 info,
125{ 125 const struct tnl_ptk_info *tpi)
126 __sum16 csum = 0;
127
128 switch (skb->ip_summed) {
129 case CHECKSUM_COMPLETE:
130 csum = csum_fold(skb->csum);
131
132 if (!csum)
133 break;
134 /* Fall through. */
135
136 case CHECKSUM_NONE:
137 skb->csum = 0;
138 csum = __skb_checksum_complete(skb);
139 skb->ip_summed = CHECKSUM_COMPLETE;
140 break;
141 }
142
143 return csum;
144}
145
146static int ip_gre_calc_hlen(__be16 o_flags)
147{
148 int addend = 4;
149
150 if (o_flags&TUNNEL_CSUM)
151 addend += 4;
152 if (o_flags&TUNNEL_KEY)
153 addend += 4;
154 if (o_flags&TUNNEL_SEQ)
155 addend += 4;
156 return addend;
157}
158
159static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
160 bool *csum_err, int *hdr_len)
161{
162 unsigned int ip_hlen = ip_hdrlen(skb);
163 const struct gre_base_hdr *greh;
164 __be32 *options;
165
166 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
167 return -EINVAL;
168
169 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
170 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
171 return -EINVAL;
172
173 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
174 *hdr_len = ip_gre_calc_hlen(tpi->flags);
175
176 if (!pskb_may_pull(skb, *hdr_len))
177 return -EINVAL;
178
179 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
180
181 tpi->proto = greh->protocol;
182
183 options = (__be32 *)(greh + 1);
184 if (greh->flags & GRE_CSUM) {
185 if (check_checksum(skb)) {
186 *csum_err = true;
187 return -EINVAL;
188 }
189 options++;
190 }
191
192 if (greh->flags & GRE_KEY) {
193 tpi->key = *options;
194 options++;
195 } else
196 tpi->key = 0;
197
198 if (unlikely(greh->flags & GRE_SEQ)) {
199 tpi->seq = *options;
200 options++;
201 } else
202 tpi->seq = 0;
203
204 /* WCCP version 1 and 2 protocol decoding.
205 * - Change protocol to IP
206 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
207 */
208 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
209 tpi->proto = htons(ETH_P_IP);
210 if ((*(u8 *)options & 0xF0) != 0x40) {
211 *hdr_len += 4;
212 if (!pskb_may_pull(skb, *hdr_len))
213 return -EINVAL;
214 }
215 }
216
217 return 0;
218}
219
220static void ipgre_err(struct sk_buff *skb, u32 info)
221{ 126{
222 127
223 /* All the routers (except for Linux) return only 128 /* All the routers (except for Linux) return only
@@ -239,26 +144,18 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
239 const int type = icmp_hdr(skb)->type; 144 const int type = icmp_hdr(skb)->type;
240 const int code = icmp_hdr(skb)->code; 145 const int code = icmp_hdr(skb)->code;
241 struct ip_tunnel *t; 146 struct ip_tunnel *t;
242 struct tnl_ptk_info tpi;
243 int hdr_len;
244 bool csum_err = false;
245
246 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
247 if (!csum_err) /* ignore csum errors. */
248 return;
249 }
250 147
251 switch (type) { 148 switch (type) {
252 default: 149 default:
253 case ICMP_PARAMETERPROB: 150 case ICMP_PARAMETERPROB:
254 return; 151 return PACKET_RCVD;
255 152
256 case ICMP_DEST_UNREACH: 153 case ICMP_DEST_UNREACH:
257 switch (code) { 154 switch (code) {
258 case ICMP_SR_FAILED: 155 case ICMP_SR_FAILED:
259 case ICMP_PORT_UNREACH: 156 case ICMP_PORT_UNREACH:
260 /* Impossible event. */ 157 /* Impossible event. */
261 return; 158 return PACKET_RCVD;
262 default: 159 default:
263 /* All others are translated to HOST_UNREACH. 160 /* All others are translated to HOST_UNREACH.
264 rfc2003 contains "deep thoughts" about NET_UNREACH, 161 rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -269,138 +166,61 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
269 break; 166 break;
270 case ICMP_TIME_EXCEEDED: 167 case ICMP_TIME_EXCEEDED:
271 if (code != ICMP_EXC_TTL) 168 if (code != ICMP_EXC_TTL)
272 return; 169 return PACKET_RCVD;
273 break; 170 break;
274 171
275 case ICMP_REDIRECT: 172 case ICMP_REDIRECT:
276 break; 173 break;
277 } 174 }
278 175
279 if (tpi.proto == htons(ETH_P_TEB)) 176 if (tpi->proto == htons(ETH_P_TEB))
280 itn = net_generic(net, gre_tap_net_id); 177 itn = net_generic(net, gre_tap_net_id);
281 else 178 else
282 itn = net_generic(net, ipgre_net_id); 179 itn = net_generic(net, ipgre_net_id);
283 180
284 iph = (const struct iphdr *)skb->data; 181 iph = (const struct iphdr *)skb->data;
285 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, 182 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
286 iph->daddr, iph->saddr, tpi.key); 183 iph->daddr, iph->saddr, tpi->key);
287 184
288 if (t == NULL) 185 if (t == NULL)
289 return; 186 return PACKET_REJECT;
290 187
291 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
292 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
293 t->parms.link, 0, IPPROTO_GRE, 0);
294 return;
295 }
296 if (type == ICMP_REDIRECT) {
297 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
298 IPPROTO_GRE, 0);
299 return;
300 }
301 if (t->parms.iph.daddr == 0 || 188 if (t->parms.iph.daddr == 0 ||
302 ipv4_is_multicast(t->parms.iph.daddr)) 189 ipv4_is_multicast(t->parms.iph.daddr))
303 return; 190 return PACKET_RCVD;
304 191
305 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 192 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
306 return; 193 return PACKET_RCVD;
307 194
308 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 195 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
309 t->err_count++; 196 t->err_count++;
310 else 197 else
311 t->err_count = 1; 198 t->err_count = 1;
312 t->err_time = jiffies; 199 t->err_time = jiffies;
200 return PACKET_RCVD;
313} 201}
314 202
315static int ipgre_rcv(struct sk_buff *skb) 203static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
316{ 204{
317 struct net *net = dev_net(skb->dev); 205 struct net *net = dev_net(skb->dev);
318 struct ip_tunnel_net *itn; 206 struct ip_tunnel_net *itn;
319 const struct iphdr *iph; 207 const struct iphdr *iph;
320 struct ip_tunnel *tunnel; 208 struct ip_tunnel *tunnel;
321 struct tnl_ptk_info tpi;
322 int hdr_len;
323 bool csum_err = false;
324
325 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
326 goto drop;
327 209
328 if (tpi.proto == htons(ETH_P_TEB)) 210 if (tpi->proto == htons(ETH_P_TEB))
329 itn = net_generic(net, gre_tap_net_id); 211 itn = net_generic(net, gre_tap_net_id);
330 else 212 else
331 itn = net_generic(net, ipgre_net_id); 213 itn = net_generic(net, ipgre_net_id);
332 214
333 iph = ip_hdr(skb); 215 iph = ip_hdr(skb);
334 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, 216 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
335 iph->saddr, iph->daddr, tpi.key); 217 iph->saddr, iph->daddr, tpi->key);
336 218
337 if (tunnel) { 219 if (tunnel) {
338 ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); 220 ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
339 return 0; 221 return PACKET_RCVD;
340 }
341 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
342drop:
343 kfree_skb(skb);
344 return 0;
345}
346
347static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
348{
349 int err;
350
351 if (skb_is_gso(skb)) {
352 err = skb_unclone(skb, GFP_ATOMIC);
353 if (unlikely(err))
354 goto error;
355 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
356 return skb;
357 } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
358 tunnel->parms.o_flags&TUNNEL_CSUM) {
359 err = skb_checksum_help(skb);
360 if (unlikely(err))
361 goto error;
362 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
363 skb->ip_summed = CHECKSUM_NONE;
364
365 return skb;
366
367error:
368 kfree_skb(skb);
369 return ERR_PTR(err);
370}
371
372static struct sk_buff *gre_build_header(struct sk_buff *skb,
373 const struct tnl_ptk_info *tpi,
374 int hdr_len)
375{
376 struct gre_base_hdr *greh;
377
378 skb_push(skb, hdr_len);
379
380 greh = (struct gre_base_hdr *)skb->data;
381 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
382 greh->protocol = tpi->proto;
383
384 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
385 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
386
387 if (tpi->flags&TUNNEL_SEQ) {
388 *ptr = tpi->seq;
389 ptr--;
390 }
391 if (tpi->flags&TUNNEL_KEY) {
392 *ptr = tpi->key;
393 ptr--;
394 }
395 if (tpi->flags&TUNNEL_CSUM &&
396 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
397 *(__sum16 *)ptr = 0;
398 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
399 skb->len, 0));
400 }
401 } 222 }
402 223 return PACKET_REJECT;
403 return skb;
404} 224}
405 225
406static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, 226static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -410,11 +230,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
410 struct ip_tunnel *tunnel = netdev_priv(dev); 230 struct ip_tunnel *tunnel = netdev_priv(dev);
411 struct tnl_ptk_info tpi; 231 struct tnl_ptk_info tpi;
412 232
413 if (likely(!skb->encapsulation)) {
414 skb_reset_inner_headers(skb);
415 skb->encapsulation = 1;
416 }
417
418 tpi.flags = tunnel->parms.o_flags; 233 tpi.flags = tunnel->parms.o_flags;
419 tpi.proto = proto; 234 tpi.proto = proto;
420 tpi.key = tunnel->parms.o_key; 235 tpi.key = tunnel->parms.o_key;
@@ -423,13 +238,9 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
423 tpi.seq = htonl(tunnel->o_seqno); 238 tpi.seq = htonl(tunnel->o_seqno);
424 239
425 /* Push GRE header. */ 240 /* Push GRE header. */
426 skb = gre_build_header(skb, &tpi, tunnel->hlen); 241 gre_build_header(skb, &tpi, tunnel->hlen);
427 if (unlikely(!skb)) {
428 dev->stats.tx_dropped++;
429 return;
430 }
431 242
432 ip_tunnel_xmit(skb, dev, tnl_params); 243 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
433} 244}
434 245
435static netdev_tx_t ipgre_xmit(struct sk_buff *skb, 246static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
@@ -438,7 +249,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
438 struct ip_tunnel *tunnel = netdev_priv(dev); 249 struct ip_tunnel *tunnel = netdev_priv(dev);
439 const struct iphdr *tnl_params; 250 const struct iphdr *tnl_params;
440 251
441 skb = handle_offloads(tunnel, skb); 252 skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
442 if (IS_ERR(skb)) 253 if (IS_ERR(skb))
443 goto out; 254 goto out;
444 255
@@ -477,7 +288,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
477{ 288{
478 struct ip_tunnel *tunnel = netdev_priv(dev); 289 struct ip_tunnel *tunnel = netdev_priv(dev);
479 290
480 skb = handle_offloads(tunnel, skb); 291 skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
481 if (IS_ERR(skb)) 292 if (IS_ERR(skb))
482 goto out; 293 goto out;
483 294
@@ -503,10 +314,11 @@ static int ipgre_tunnel_ioctl(struct net_device *dev,
503 314
504 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 315 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
505 return -EFAULT; 316 return -EFAULT;
506 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 317 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
507 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 318 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
508 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { 319 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
509 return -EINVAL; 320 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
321 return -EINVAL;
510 } 322 }
511 p.i_flags = gre_flags_to_tnl_flags(p.i_flags); 323 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
512 p.o_flags = gre_flags_to_tnl_flags(p.o_flags); 324 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
@@ -571,7 +383,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
571 if (daddr) 383 if (daddr)
572 memcpy(&iph->daddr, daddr, 4); 384 memcpy(&iph->daddr, daddr, 4);
573 if (iph->daddr) 385 if (iph->daddr)
574 return t->hlen; 386 return t->hlen + sizeof(*iph);
575 387
576 return -(t->hlen + sizeof(*iph)); 388 return -(t->hlen + sizeof(*iph));
577} 389}
@@ -708,9 +520,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
708 return ip_tunnel_init(dev); 520 return ip_tunnel_init(dev);
709} 521}
710 522
711static const struct gre_protocol ipgre_protocol = { 523static struct gre_cisco_protocol ipgre_protocol = {
712 .handler = ipgre_rcv, 524 .handler = ipgre_rcv,
713 .err_handler = ipgre_err, 525 .err_handler = ipgre_err,
526 .priority = 0,
714}; 527};
715 528
716static int __net_init ipgre_init_net(struct net *net) 529static int __net_init ipgre_init_net(struct net *net)
@@ -721,7 +534,7 @@ static int __net_init ipgre_init_net(struct net *net)
721static void __net_exit ipgre_exit_net(struct net *net) 534static void __net_exit ipgre_exit_net(struct net *net)
722{ 535{
723 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); 536 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
724 ip_tunnel_delete_net(itn); 537 ip_tunnel_delete_net(itn, &ipgre_link_ops);
725} 538}
726 539
727static struct pernet_operations ipgre_net_ops = { 540static struct pernet_operations ipgre_net_ops = {
@@ -954,7 +767,7 @@ static int __net_init ipgre_tap_init_net(struct net *net)
954static void __net_exit ipgre_tap_exit_net(struct net *net) 767static void __net_exit ipgre_tap_exit_net(struct net *net)
955{ 768{
956 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); 769 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
957 ip_tunnel_delete_net(itn); 770 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
958} 771}
959 772
960static struct pernet_operations ipgre_tap_net_ops = { 773static struct pernet_operations ipgre_tap_net_ops = {
@@ -978,7 +791,7 @@ static int __init ipgre_init(void)
978 if (err < 0) 791 if (err < 0)
979 goto pnet_tap_faied; 792 goto pnet_tap_faied;
980 793
981 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 794 err = gre_cisco_register(&ipgre_protocol);
982 if (err < 0) { 795 if (err < 0) {
983 pr_info("%s: can't add protocol\n", __func__); 796 pr_info("%s: can't add protocol\n", __func__);
984 goto add_proto_failed; 797 goto add_proto_failed;
@@ -997,7 +810,7 @@ static int __init ipgre_init(void)
997tap_ops_failed: 810tap_ops_failed:
998 rtnl_link_unregister(&ipgre_link_ops); 811 rtnl_link_unregister(&ipgre_link_ops);
999rtnl_link_failed: 812rtnl_link_failed:
1000 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 813 gre_cisco_unregister(&ipgre_protocol);
1001add_proto_failed: 814add_proto_failed:
1002 unregister_pernet_device(&ipgre_tap_net_ops); 815 unregister_pernet_device(&ipgre_tap_net_ops);
1003pnet_tap_faied: 816pnet_tap_faied:
@@ -1009,8 +822,7 @@ static void __exit ipgre_fini(void)
1009{ 822{
1010 rtnl_link_unregister(&ipgre_tap_ops); 823 rtnl_link_unregister(&ipgre_tap_ops);
1011 rtnl_link_unregister(&ipgre_link_ops); 824 rtnl_link_unregister(&ipgre_link_ops);
1012 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) 825 gre_cisco_unregister(&ipgre_protocol);
1013 pr_info("%s: can't remove protocol\n", __func__);
1014 unregister_pernet_device(&ipgre_tap_net_ops); 826 unregister_pernet_device(&ipgre_tap_net_ops);
1015 unregister_pernet_device(&ipgre_net_ops); 827 unregister_pernet_device(&ipgre_net_ops);
1016} 828}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 3da817b89e9b..054a3e97d822 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -141,6 +141,7 @@
141#include <net/icmp.h> 141#include <net/icmp.h>
142#include <net/raw.h> 142#include <net/raw.h>
143#include <net/checksum.h> 143#include <net/checksum.h>
144#include <net/inet_ecn.h>
144#include <linux/netfilter_ipv4.h> 145#include <linux/netfilter_ipv4.h>
145#include <net/xfrm.h> 146#include <net/xfrm.h>
146#include <linux/mroute.h> 147#include <linux/mroute.h>
@@ -190,10 +191,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
190{ 191{
191 struct net *net = dev_net(skb->dev); 192 struct net *net = dev_net(skb->dev);
192 193
193 __skb_pull(skb, ip_hdrlen(skb)); 194 __skb_pull(skb, skb_network_header_len(skb));
194
195 /* Point into the IP datagram, just past the header. */
196 skb_reset_transport_header(skb);
197 195
198 rcu_read_lock(); 196 rcu_read_lock();
199 { 197 {
@@ -413,6 +411,13 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
413 if (iph->ihl < 5 || iph->version != 4) 411 if (iph->ihl < 5 || iph->version != 4)
414 goto inhdr_error; 412 goto inhdr_error;
415 413
414 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
415 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
416 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
417 IP_ADD_STATS_BH(dev_net(dev),
418 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
419 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
420
416 if (!pskb_may_pull(skb, iph->ihl*4)) 421 if (!pskb_may_pull(skb, iph->ihl*4))
417 goto inhdr_error; 422 goto inhdr_error;
418 423
@@ -437,6 +442,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
437 goto drop; 442 goto drop;
438 } 443 }
439 444
445 skb->transport_header = skb->network_header + iph->ihl*4;
446
440 /* Remove any debris in the socket control block */ 447 /* Remove any debris in the socket control block */
441 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 448 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
442 449
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4bcabf3ab4ca..a04d872c54f9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
148 iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); 148 iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
149 iph->saddr = saddr; 149 iph->saddr = saddr;
150 iph->protocol = sk->sk_protocol; 150 iph->protocol = sk->sk_protocol;
151 ip_select_ident(iph, &rt->dst, sk); 151 ip_select_ident(skb, &rt->dst, sk);
152 152
153 if (opt && opt->opt.optlen) { 153 if (opt && opt->opt.optlen) {
154 iph->ihl += opt->opt.optlen>>2; 154 iph->ihl += opt->opt.optlen>>2;
@@ -211,14 +211,6 @@ static inline int ip_finish_output2(struct sk_buff *skb)
211 return -EINVAL; 211 return -EINVAL;
212} 212}
213 213
214static inline int ip_skb_dst_mtu(struct sk_buff *skb)
215{
216 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
217
218 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
219 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
220}
221
222static int ip_finish_output(struct sk_buff *skb) 214static int ip_finish_output(struct sk_buff *skb)
223{ 215{
224#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 216#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -394,7 +386,7 @@ packet_routed:
394 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); 386 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
395 } 387 }
396 388
397 ip_select_ident_more(iph, &rt->dst, sk, 389 ip_select_ident_more(skb, &rt->dst, sk,
398 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 390 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
399 391
400 skb->priority = sk->sk_priority; 392 skb->priority = sk->sk_priority;
@@ -1324,7 +1316,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
1324 else 1316 else
1325 ttl = ip_select_ttl(inet, &rt->dst); 1317 ttl = ip_select_ttl(inet, &rt->dst);
1326 1318
1327 iph = (struct iphdr *)skb->data; 1319 iph = ip_hdr(skb);
1328 iph->version = 4; 1320 iph->version = 4;
1329 iph->ihl = 5; 1321 iph->ihl = 5;
1330 iph->tos = inet->tos; 1322 iph->tos = inet->tos;
@@ -1332,7 +1324,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
1332 iph->ttl = ttl; 1324 iph->ttl = ttl;
1333 iph->protocol = sk->sk_protocol; 1325 iph->protocol = sk->sk_protocol;
1334 ip_copy_addrs(iph, fl4); 1326 ip_copy_addrs(iph, fl4);
1335 ip_select_ident(iph, &rt->dst, sk); 1327 ip_select_ident(skb, &rt->dst, sk);
1336 1328
1337 if (opt) { 1329 if (opt) {
1338 iph->ihl += opt->optlen>>2; 1330 iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index be2f8da0ae8e..63a6d6d6b875 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -304,6 +304,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
304 304
305 tunnel = netdev_priv(dev); 305 tunnel = netdev_priv(dev);
306 tunnel->parms = *parms; 306 tunnel->parms = *parms;
307 tunnel->net = net;
307 308
308 err = register_netdevice(dev); 309 err = register_netdevice(dev);
309 if (err) 310 if (err)
@@ -349,7 +350,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
349 struct flowi4 fl4; 350 struct flowi4 fl4;
350 struct rtable *rt; 351 struct rtable *rt;
351 352
352 rt = ip_route_output_tunnel(dev_net(dev), &fl4, 353 rt = ip_route_output_tunnel(tunnel->net, &fl4,
353 tunnel->parms.iph.protocol, 354 tunnel->parms.iph.protocol,
354 iph->daddr, iph->saddr, 355 iph->daddr, iph->saddr,
355 tunnel->parms.o_key, 356 tunnel->parms.o_key,
@@ -364,7 +365,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
364 } 365 }
365 366
366 if (!tdev && tunnel->parms.link) 367 if (!tdev && tunnel->parms.link)
367 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 368 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
368 369
369 if (tdev) { 370 if (tdev) {
370 hlen = tdev->hard_header_len + tdev->needed_headroom; 371 hlen = tdev->hard_header_len + tdev->needed_headroom;
@@ -408,13 +409,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
408 const struct iphdr *iph = ip_hdr(skb); 409 const struct iphdr *iph = ip_hdr(skb);
409 int err; 410 int err;
410 411
411 secpath_reset(skb);
412
413 skb->protocol = tpi->proto;
414
415 skb->mac_header = skb->network_header;
416 __pskb_pull(skb, tunnel->hlen);
417 skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418#ifdef CONFIG_NET_IPGRE_BROADCAST 412#ifdef CONFIG_NET_IPGRE_BROADCAST
419 if (ipv4_is_multicast(iph->daddr)) { 413 if (ipv4_is_multicast(iph->daddr)) {
420 /* Looped back packet, drop it! */ 414 /* Looped back packet, drop it! */
@@ -442,23 +436,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
442 tunnel->i_seqno = ntohl(tpi->seq) + 1; 436 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443 } 437 }
444 438
445 /* Warning: All skb pointers will be invalidated! */
446 if (tunnel->dev->type == ARPHRD_ETHER) {
447 if (!pskb_may_pull(skb, ETH_HLEN)) {
448 tunnel->dev->stats.rx_length_errors++;
449 tunnel->dev->stats.rx_errors++;
450 goto drop;
451 }
452
453 iph = ip_hdr(skb);
454 skb->protocol = eth_type_trans(skb, tunnel->dev);
455 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
456 }
457
458 skb->pkt_type = PACKET_HOST;
459 __skb_tunnel_rx(skb, tunnel->dev);
460
461 skb_reset_network_header(skb);
462 err = IP_ECN_decapsulate(iph, skb); 439 err = IP_ECN_decapsulate(iph, skb);
463 if (unlikely(err)) { 440 if (unlikely(err)) {
464 if (log_ecn_error) 441 if (log_ecn_error)
@@ -477,6 +454,15 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
477 tstats->rx_bytes += skb->len; 454 tstats->rx_bytes += skb->len;
478 u64_stats_update_end(&tstats->syncp); 455 u64_stats_update_end(&tstats->syncp);
479 456
457 if (tunnel->dev->type == ARPHRD_ETHER) {
458 skb->protocol = eth_type_trans(skb, tunnel->dev);
459 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
460 } else {
461 skb->dev = tunnel->dev;
462 }
463
464 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
465
480 gro_cells_receive(&tunnel->gro_cells, skb); 466 gro_cells_receive(&tunnel->gro_cells, skb);
481 return 0; 467 return 0;
482 468
@@ -486,24 +472,69 @@ drop:
486} 472}
487EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 473EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
488 474
475static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
476 struct rtable *rt, __be16 df)
477{
478 struct ip_tunnel *tunnel = netdev_priv(dev);
479 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
480 int mtu;
481
482 if (df)
483 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
484 - sizeof(struct iphdr) - tunnel->hlen;
485 else
486 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
487
488 if (skb_dst(skb))
489 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
490
491 if (skb->protocol == htons(ETH_P_IP)) {
492 if (!skb_is_gso(skb) &&
493 (df & htons(IP_DF)) && mtu < pkt_size) {
494 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
495 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
496 return -E2BIG;
497 }
498 }
499#if IS_ENABLED(CONFIG_IPV6)
500 else if (skb->protocol == htons(ETH_P_IPV6)) {
501 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
502
503 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
504 mtu >= IPV6_MIN_MTU) {
505 if ((tunnel->parms.iph.daddr &&
506 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
507 rt6->rt6i_dst.plen == 128) {
508 rt6->rt6i_flags |= RTF_MODIFIED;
509 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
510 }
511 }
512
513 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
514 mtu < pkt_size) {
515 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
516 return -E2BIG;
517 }
518 }
519#endif
520 return 0;
521}
522
489void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 523void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
490 const struct iphdr *tnl_params) 524 const struct iphdr *tnl_params, const u8 protocol)
491{ 525{
492 struct ip_tunnel *tunnel = netdev_priv(dev); 526 struct ip_tunnel *tunnel = netdev_priv(dev);
493 const struct iphdr *inner_iph; 527 const struct iphdr *inner_iph;
494 struct iphdr *iph;
495 struct flowi4 fl4; 528 struct flowi4 fl4;
496 u8 tos, ttl; 529 u8 tos, ttl;
497 __be16 df; 530 __be16 df;
498 struct rtable *rt; /* Route to the other host */ 531 struct rtable *rt; /* Route to the other host */
499 struct net_device *tdev; /* Device to other host */
500 unsigned int max_headroom; /* The extra header space needed */ 532 unsigned int max_headroom; /* The extra header space needed */
501 __be32 dst; 533 __be32 dst;
502 int mtu; 534 int err;
503 535
504 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 536 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
505 537
506 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
507 dst = tnl_params->daddr; 538 dst = tnl_params->daddr;
508 if (dst == 0) { 539 if (dst == 0) {
509 /* NBMA tunnel */ 540 /* NBMA tunnel */
@@ -561,8 +592,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
561 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 592 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
562 } 593 }
563 594
564 rt = ip_route_output_tunnel(dev_net(dev), &fl4, 595 rt = ip_route_output_tunnel(tunnel->net, &fl4,
565 tunnel->parms.iph.protocol, 596 protocol,
566 dst, tnl_params->saddr, 597 dst, tnl_params->saddr,
567 tunnel->parms.o_key, 598 tunnel->parms.o_key,
568 RT_TOS(tos), 599 RT_TOS(tos),
@@ -571,58 +602,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
571 dev->stats.tx_carrier_errors++; 602 dev->stats.tx_carrier_errors++;
572 goto tx_error; 603 goto tx_error;
573 } 604 }
574 tdev = rt->dst.dev; 605 if (rt->dst.dev == dev) {
575
576 if (tdev == dev) {
577 ip_rt_put(rt); 606 ip_rt_put(rt);
578 dev->stats.collisions++; 607 dev->stats.collisions++;
579 goto tx_error; 608 goto tx_error;
580 } 609 }
581 610
582 df = tnl_params->frag_off; 611 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
583 612 ip_rt_put(rt);
584 if (df) 613 goto tx_error;
585 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
586 - sizeof(struct iphdr);
587 else
588 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
589
590 if (skb_dst(skb))
591 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
592
593 if (skb->protocol == htons(ETH_P_IP)) {
594 df |= (inner_iph->frag_off&htons(IP_DF));
595
596 if (!skb_is_gso(skb) &&
597 (inner_iph->frag_off&htons(IP_DF)) &&
598 mtu < ntohs(inner_iph->tot_len)) {
599 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
600 ip_rt_put(rt);
601 goto tx_error;
602 }
603 }
604#if IS_ENABLED(CONFIG_IPV6)
605 else if (skb->protocol == htons(ETH_P_IPV6)) {
606 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
607
608 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
609 mtu >= IPV6_MIN_MTU) {
610 if ((tunnel->parms.iph.daddr &&
611 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
612 rt6->rt6i_dst.plen == 128) {
613 rt6->rt6i_flags |= RTF_MODIFIED;
614 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
615 }
616 }
617
618 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
619 mtu < skb->len) {
620 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
621 ip_rt_put(rt);
622 goto tx_error;
623 }
624 } 614 }
625#endif
626 615
627 if (tunnel->err_count > 0) { 616 if (tunnel->err_count > 0) {
628 if (time_before(jiffies, 617 if (time_before(jiffies,
@@ -634,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
634 tunnel->err_count = 0; 623 tunnel->err_count = 0;
635 } 624 }
636 625
626 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
637 ttl = tnl_params->ttl; 627 ttl = tnl_params->ttl;
638 if (ttl == 0) { 628 if (ttl == 0) {
639 if (skb->protocol == htons(ETH_P_IP)) 629 if (skb->protocol == htons(ETH_P_IP))
@@ -646,38 +636,25 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
646 ttl = ip4_dst_hoplimit(&rt->dst); 636 ttl = ip4_dst_hoplimit(&rt->dst);
647 } 637 }
648 638
649 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) 639 df = tnl_params->frag_off;
650 + rt->dst.header_len; 640 if (skb->protocol == htons(ETH_P_IP))
651 if (max_headroom > dev->needed_headroom) { 641 df |= (inner_iph->frag_off&htons(IP_DF));
652 dev->needed_headroom = max_headroom;
653 if (skb_cow_head(skb, dev->needed_headroom)) {
654 dev->stats.tx_dropped++;
655 dev_kfree_skb(skb);
656 return;
657 }
658 }
659 642
660 skb_dst_drop(skb); 643 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
661 skb_dst_set(skb, &rt->dst); 644 + rt->dst.header_len;
645 if (max_headroom > dev->needed_headroom)
646 dev->needed_headroom = max_headroom;
662 647
663 /* Push down and install the IP header. */ 648 if (skb_cow_head(skb, dev->needed_headroom)) {
664 skb_push(skb, sizeof(struct iphdr)); 649 dev->stats.tx_dropped++;
665 skb_reset_network_header(skb); 650 dev_kfree_skb(skb);
651 return;
652 }
666 653
667 iph = ip_hdr(skb); 654 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
668 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 655 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
656 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
669 657
670 iph->version = 4;
671 iph->ihl = sizeof(struct iphdr) >> 2;
672 iph->frag_off = df;
673 iph->protocol = tnl_params->protocol;
674 iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
675 iph->daddr = fl4.daddr;
676 iph->saddr = fl4.saddr;
677 iph->ttl = ttl;
678 tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
679
680 iptunnel_xmit(skb, dev);
681 return; 658 return;
682 659
683#if IS_ENABLED(CONFIG_IPV6) 660#if IS_ENABLED(CONFIG_IPV6)
@@ -840,11 +817,10 @@ static void ip_tunnel_dev_free(struct net_device *dev)
840 817
841void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 818void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
842{ 819{
843 struct net *net = dev_net(dev);
844 struct ip_tunnel *tunnel = netdev_priv(dev); 820 struct ip_tunnel *tunnel = netdev_priv(dev);
845 struct ip_tunnel_net *itn; 821 struct ip_tunnel_net *itn;
846 822
847 itn = net_generic(net, tunnel->ip_tnl_net_id); 823 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
848 824
849 if (itn->fb_tunnel_dev != dev) { 825 if (itn->fb_tunnel_dev != dev) {
850 ip_tunnel_del(netdev_priv(dev)); 826 ip_tunnel_del(netdev_priv(dev));
@@ -853,61 +829,73 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
853} 829}
854EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 830EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
855 831
856int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 832int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
857 struct rtnl_link_ops *ops, char *devname) 833 struct rtnl_link_ops *ops, char *devname)
858{ 834{
859 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 835 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
860 struct ip_tunnel_parm parms; 836 struct ip_tunnel_parm parms;
837 unsigned int i;
861 838
862 itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); 839 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
863 if (!itn->tunnels) 840 INIT_HLIST_HEAD(&itn->tunnels[i]);
864 return -ENOMEM;
865 841
866 if (!ops) { 842 if (!ops) {
867 itn->fb_tunnel_dev = NULL; 843 itn->fb_tunnel_dev = NULL;
868 return 0; 844 return 0;
869 } 845 }
846
870 memset(&parms, 0, sizeof(parms)); 847 memset(&parms, 0, sizeof(parms));
871 if (devname) 848 if (devname)
872 strlcpy(parms.name, devname, IFNAMSIZ); 849 strlcpy(parms.name, devname, IFNAMSIZ);
873 850
874 rtnl_lock(); 851 rtnl_lock();
875 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 852 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
876 rtnl_unlock(); 853 /* FB netdevice is special: we have one, and only one per netns.
877 if (IS_ERR(itn->fb_tunnel_dev)) { 854 * Allowing to move it to another netns is clearly unsafe.
878 kfree(itn->tunnels); 855 */
879 return PTR_ERR(itn->fb_tunnel_dev); 856 if (!IS_ERR(itn->fb_tunnel_dev)) {
857 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
858 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
880 } 859 }
860 rtnl_unlock();
881 861
882 return 0; 862 return PTR_RET(itn->fb_tunnel_dev);
883} 863}
884EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 864EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
885 865
886static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) 866static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
867 struct rtnl_link_ops *ops)
887{ 868{
869 struct net *net = dev_net(itn->fb_tunnel_dev);
870 struct net_device *dev, *aux;
888 int h; 871 int h;
889 872
873 for_each_netdev_safe(net, dev, aux)
874 if (dev->rtnl_link_ops == ops)
875 unregister_netdevice_queue(dev, head);
876
890 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 877 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
891 struct ip_tunnel *t; 878 struct ip_tunnel *t;
892 struct hlist_node *n; 879 struct hlist_node *n;
893 struct hlist_head *thead = &itn->tunnels[h]; 880 struct hlist_head *thead = &itn->tunnels[h];
894 881
895 hlist_for_each_entry_safe(t, n, thead, hash_node) 882 hlist_for_each_entry_safe(t, n, thead, hash_node)
896 unregister_netdevice_queue(t->dev, head); 883 /* If dev is in the same netns, it has already
884 * been added to the list by the previous loop.
885 */
886 if (!net_eq(dev_net(t->dev), net))
887 unregister_netdevice_queue(t->dev, head);
897 } 888 }
898 if (itn->fb_tunnel_dev)
899 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
900} 889}
901 890
902void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn) 891void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
903{ 892{
904 LIST_HEAD(list); 893 LIST_HEAD(list);
905 894
906 rtnl_lock(); 895 rtnl_lock();
907 ip_tunnel_destroy(itn, &list); 896 ip_tunnel_destroy(itn, &list, ops);
908 unregister_netdevice_many(&list); 897 unregister_netdevice_many(&list);
909 rtnl_unlock(); 898 rtnl_unlock();
910 kfree(itn->tunnels);
911} 899}
912EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 900EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
913 901
@@ -926,6 +914,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
926 if (ip_tunnel_find(itn, p, dev->type)) 914 if (ip_tunnel_find(itn, p, dev->type))
927 return -EEXIST; 915 return -EEXIST;
928 916
917 nt->net = net;
929 nt->parms = *p; 918 nt->parms = *p;
930 err = register_netdevice(dev); 919 err = register_netdevice(dev);
931 if (err) 920 if (err)
@@ -948,23 +937,21 @@ EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
948int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 937int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
949 struct ip_tunnel_parm *p) 938 struct ip_tunnel_parm *p)
950{ 939{
951 struct ip_tunnel *t, *nt; 940 struct ip_tunnel *t;
952 struct net *net = dev_net(dev);
953 struct ip_tunnel *tunnel = netdev_priv(dev); 941 struct ip_tunnel *tunnel = netdev_priv(dev);
942 struct net *net = tunnel->net;
954 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 943 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
955 944
956 if (dev == itn->fb_tunnel_dev) 945 if (dev == itn->fb_tunnel_dev)
957 return -EINVAL; 946 return -EINVAL;
958 947
959 nt = netdev_priv(dev);
960
961 t = ip_tunnel_find(itn, p, dev->type); 948 t = ip_tunnel_find(itn, p, dev->type);
962 949
963 if (t) { 950 if (t) {
964 if (t->dev != dev) 951 if (t->dev != dev)
965 return -EEXIST; 952 return -EEXIST;
966 } else { 953 } else {
967 t = nt; 954 t = tunnel;
968 955
969 if (dev->type != ARPHRD_ETHER) { 956 if (dev->type != ARPHRD_ETHER) {
970 unsigned int nflags = 0; 957 unsigned int nflags = 0;
@@ -1003,6 +990,7 @@ int ip_tunnel_init(struct net_device *dev)
1003 } 990 }
1004 991
1005 tunnel->dev = dev; 992 tunnel->dev = dev;
993 tunnel->net = dev_net(dev);
1006 strcpy(tunnel->parms.name, dev->name); 994 strcpy(tunnel->parms.name, dev->name);
1007 iph->version = 4; 995 iph->version = 4;
1008 iph->ihl = 5; 996 iph->ihl = 5;
@@ -1013,8 +1001,8 @@ EXPORT_SYMBOL_GPL(ip_tunnel_init);
1013 1001
1014void ip_tunnel_uninit(struct net_device *dev) 1002void ip_tunnel_uninit(struct net_device *dev)
1015{ 1003{
1016 struct net *net = dev_net(dev);
1017 struct ip_tunnel *tunnel = netdev_priv(dev); 1004 struct ip_tunnel *tunnel = netdev_priv(dev);
1005 struct net *net = tunnel->net;
1018 struct ip_tunnel_net *itn; 1006 struct ip_tunnel_net *itn;
1019 1007
1020 itn = net_generic(net, tunnel->ip_tnl_net_id); 1008 itn = net_generic(net, tunnel->ip_tnl_net_id);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
new file mode 100644
index 000000000000..c31e3ad98ef2
--- /dev/null
+++ b/net/ipv4/ip_tunnel_core.c
@@ -0,0 +1,118 @@
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/skbuff.h>
24#include <linux/netdevice.h>
25#include <linux/in.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/netfilter_ipv4.h>
32#include <linux/etherdevice.h>
33#include <linux/if_ether.h>
34#include <linux/if_vlan.h>
35
36#include <net/ip.h>
37#include <net/icmp.h>
38#include <net/protocol.h>
39#include <net/ip_tunnels.h>
40#include <net/arp.h>
41#include <net/checksum.h>
42#include <net/dsfield.h>
43#include <net/inet_ecn.h>
44#include <net/xfrm.h>
45#include <net/net_namespace.h>
46#include <net/netns/generic.h>
47#include <net/rtnetlink.h>
48
49int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
50 __be32 src, __be32 dst, __u8 proto,
51 __u8 tos, __u8 ttl, __be16 df, bool xnet)
52{
53 int pkt_len = skb->len;
54 struct iphdr *iph;
55 int err;
56
57 skb_scrub_packet(skb, xnet);
58
59 skb->rxhash = 0;
60 skb_dst_set(skb, &rt->dst);
61 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
62
63 /* Push down and install the IP header. */
64 skb_push(skb, sizeof(struct iphdr));
65 skb_reset_network_header(skb);
66
67 iph = ip_hdr(skb);
68
69 iph->version = 4;
70 iph->ihl = sizeof(struct iphdr) >> 2;
71 iph->frag_off = df;
72 iph->protocol = proto;
73 iph->tos = tos;
74 iph->daddr = dst;
75 iph->saddr = src;
76 iph->ttl = ttl;
77 __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
78
79 err = ip_local_out(skb);
80 if (unlikely(net_xmit_eval(err)))
81 pkt_len = 0;
82 return pkt_len;
83}
84EXPORT_SYMBOL_GPL(iptunnel_xmit);
85
86int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
87{
88 if (unlikely(!pskb_may_pull(skb, hdr_len)))
89 return -ENOMEM;
90
91 skb_pull_rcsum(skb, hdr_len);
92
93 if (inner_proto == htons(ETH_P_TEB)) {
94 struct ethhdr *eh = (struct ethhdr *)skb->data;
95
96 if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
97 return -ENOMEM;
98
99 if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
100 skb->protocol = eh->h_proto;
101 else
102 skb->protocol = htons(ETH_P_802_2);
103
104 } else {
105 skb->protocol = inner_proto;
106 }
107
108 nf_reset(skb);
109 secpath_reset(skb);
110 if (!skb->l4_rxhash)
111 skb->rxhash = 0;
112 skb_dst_drop(skb);
113 skb->vlan_tci = 0;
114 skb_set_queue_mapping(skb, 0);
115 skb->pkt_type = PACKET_HOST;
116 return 0;
117}
118EXPORT_SYMBOL_GPL(iptunnel_pull_header);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 9d2bdb2c1d3f..e805e7b3030e 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -44,176 +44,10 @@
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
45#include <net/netns/generic.h> 45#include <net/netns/generic.h>
46 46
47#define HASH_SIZE 16
48#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
49
50static struct rtnl_link_ops vti_link_ops __read_mostly; 47static struct rtnl_link_ops vti_link_ops __read_mostly;
51 48
52static int vti_net_id __read_mostly; 49static int vti_net_id __read_mostly;
53struct vti_net {
54 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
55 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
56 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
57 struct ip_tunnel __rcu *tunnels_wc[1];
58 struct ip_tunnel __rcu **tunnels[4];
59
60 struct net_device *fb_tunnel_dev;
61};
62
63static int vti_fb_tunnel_init(struct net_device *dev);
64static int vti_tunnel_init(struct net_device *dev); 50static int vti_tunnel_init(struct net_device *dev);
65static void vti_tunnel_setup(struct net_device *dev);
66static void vti_dev_free(struct net_device *dev);
67static int vti_tunnel_bind_dev(struct net_device *dev);
68
69#define VTI_XMIT(stats1, stats2) do { \
70 int err; \
71 int pkt_len = skb->len; \
72 err = dst_output(skb); \
73 if (net_xmit_eval(err) == 0) { \
74 u64_stats_update_begin(&(stats1)->syncp); \
75 (stats1)->tx_bytes += pkt_len; \
76 (stats1)->tx_packets++; \
77 u64_stats_update_end(&(stats1)->syncp); \
78 } else { \
79 (stats2)->tx_errors++; \
80 (stats2)->tx_aborted_errors++; \
81 } \
82} while (0)
83
84
85static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
86 __be32 remote, __be32 local)
87{
88 unsigned h0 = HASH(remote);
89 unsigned h1 = HASH(local);
90 struct ip_tunnel *t;
91 struct vti_net *ipn = net_generic(net, vti_net_id);
92
93 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
94 if (local == t->parms.iph.saddr &&
95 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
96 return t;
97 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
98 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
99 return t;
100
101 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
102 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
103 return t;
104
105 for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
106 if (t && (t->dev->flags&IFF_UP))
107 return t;
108 return NULL;
109}
110
111static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
112 struct ip_tunnel_parm *parms)
113{
114 __be32 remote = parms->iph.daddr;
115 __be32 local = parms->iph.saddr;
116 unsigned h = 0;
117 int prio = 0;
118
119 if (remote) {
120 prio |= 2;
121 h ^= HASH(remote);
122 }
123 if (local) {
124 prio |= 1;
125 h ^= HASH(local);
126 }
127 return &ipn->tunnels[prio][h];
128}
129
130static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
131 struct ip_tunnel *t)
132{
133 return __vti_bucket(ipn, &t->parms);
134}
135
136static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
137{
138 struct ip_tunnel __rcu **tp;
139 struct ip_tunnel *iter;
140
141 for (tp = vti_bucket(ipn, t);
142 (iter = rtnl_dereference(*tp)) != NULL;
143 tp = &iter->next) {
144 if (t == iter) {
145 rcu_assign_pointer(*tp, t->next);
146 break;
147 }
148 }
149}
150
151static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
152{
153 struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
154
155 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
156 rcu_assign_pointer(*tp, t);
157}
158
159static struct ip_tunnel *vti_tunnel_locate(struct net *net,
160 struct ip_tunnel_parm *parms,
161 int create)
162{
163 __be32 remote = parms->iph.daddr;
164 __be32 local = parms->iph.saddr;
165 struct ip_tunnel *t, *nt;
166 struct ip_tunnel __rcu **tp;
167 struct net_device *dev;
168 char name[IFNAMSIZ];
169 struct vti_net *ipn = net_generic(net, vti_net_id);
170
171 for (tp = __vti_bucket(ipn, parms);
172 (t = rtnl_dereference(*tp)) != NULL;
173 tp = &t->next) {
174 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
175 return t;
176 }
177 if (!create)
178 return NULL;
179
180 if (parms->name[0])
181 strlcpy(name, parms->name, IFNAMSIZ);
182 else
183 strcpy(name, "vti%d");
184
185 dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
186 if (dev == NULL)
187 return NULL;
188
189 dev_net_set(dev, net);
190
191 nt = netdev_priv(dev);
192 nt->parms = *parms;
193 dev->rtnl_link_ops = &vti_link_ops;
194
195 vti_tunnel_bind_dev(dev);
196
197 if (register_netdevice(dev) < 0)
198 goto failed_free;
199
200 dev_hold(dev);
201 vti_tunnel_link(ipn, nt);
202 return nt;
203
204failed_free:
205 free_netdev(dev);
206 return NULL;
207}
208
209static void vti_tunnel_uninit(struct net_device *dev)
210{
211 struct net *net = dev_net(dev);
212 struct vti_net *ipn = net_generic(net, vti_net_id);
213
214 vti_tunnel_unlink(ipn, netdev_priv(dev));
215 dev_put(dev);
216}
217 51
218static int vti_err(struct sk_buff *skb, u32 info) 52static int vti_err(struct sk_buff *skb, u32 info)
219{ 53{
@@ -222,6 +56,8 @@ static int vti_err(struct sk_buff *skb, u32 info)
222 * 8 bytes of packet payload. It means, that precise relaying of 56 * 8 bytes of packet payload. It means, that precise relaying of
223 * ICMP in the real Internet is absolutely infeasible. 57 * ICMP in the real Internet is absolutely infeasible.
224 */ 58 */
59 struct net *net = dev_net(skb->dev);
60 struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
225 struct iphdr *iph = (struct iphdr *)skb->data; 61 struct iphdr *iph = (struct iphdr *)skb->data;
226 const int type = icmp_hdr(skb)->type; 62 const int type = icmp_hdr(skb)->type;
227 const int code = icmp_hdr(skb)->code; 63 const int code = icmp_hdr(skb)->code;
@@ -252,7 +88,8 @@ static int vti_err(struct sk_buff *skb, u32 info)
252 88
253 err = -ENOENT; 89 err = -ENOENT;
254 90
255 t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 91 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
92 iph->daddr, iph->saddr, 0);
256 if (t == NULL) 93 if (t == NULL)
257 goto out; 94 goto out;
258 95
@@ -281,8 +118,11 @@ static int vti_rcv(struct sk_buff *skb)
281{ 118{
282 struct ip_tunnel *tunnel; 119 struct ip_tunnel *tunnel;
283 const struct iphdr *iph = ip_hdr(skb); 120 const struct iphdr *iph = ip_hdr(skb);
121 struct net *net = dev_net(skb->dev);
122 struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
284 123
285 tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); 124 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
125 iph->saddr, iph->daddr, 0);
286 if (tunnel != NULL) { 126 if (tunnel != NULL) {
287 struct pcpu_tstats *tstats; 127 struct pcpu_tstats *tstats;
288 128
@@ -311,7 +151,6 @@ static int vti_rcv(struct sk_buff *skb)
311static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 151static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
312{ 152{
313 struct ip_tunnel *tunnel = netdev_priv(dev); 153 struct ip_tunnel *tunnel = netdev_priv(dev);
314 struct pcpu_tstats *tstats;
315 struct iphdr *tiph = &tunnel->parms.iph; 154 struct iphdr *tiph = &tunnel->parms.iph;
316 u8 tos; 155 u8 tos;
317 struct rtable *rt; /* Route to the other host */ 156 struct rtable *rt; /* Route to the other host */
@@ -319,6 +158,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
319 struct iphdr *old_iph = ip_hdr(skb); 158 struct iphdr *old_iph = ip_hdr(skb);
320 __be32 dst = tiph->daddr; 159 __be32 dst = tiph->daddr;
321 struct flowi4 fl4; 160 struct flowi4 fl4;
161 int err;
322 162
323 if (skb->protocol != htons(ETH_P_IP)) 163 if (skb->protocol != htons(ETH_P_IP))
324 goto tx_error; 164 goto tx_error;
@@ -361,15 +201,16 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
361 tunnel->err_count = 0; 201 tunnel->err_count = 0;
362 } 202 }
363 203
364 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 204 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
365 IPSKB_REROUTED);
366 skb_dst_drop(skb); 205 skb_dst_drop(skb);
367 skb_dst_set(skb, &rt->dst); 206 skb_dst_set(skb, &rt->dst);
368 nf_reset(skb); 207 nf_reset(skb);
369 skb->dev = skb_dst(skb)->dev; 208 skb->dev = skb_dst(skb)->dev;
370 209
371 tstats = this_cpu_ptr(dev->tstats); 210 err = dst_output(skb);
372 VTI_XMIT(tstats, &dev->stats); 211 if (net_xmit_eval(err) == 0)
212 err = skb->len;
213 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
373 return NETDEV_TX_OK; 214 return NETDEV_TX_OK;
374 215
375tx_error_icmp: 216tx_error_icmp:
@@ -380,198 +221,57 @@ tx_error:
380 return NETDEV_TX_OK; 221 return NETDEV_TX_OK;
381} 222}
382 223
383static int vti_tunnel_bind_dev(struct net_device *dev)
384{
385 struct net_device *tdev = NULL;
386 struct ip_tunnel *tunnel;
387 struct iphdr *iph;
388
389 tunnel = netdev_priv(dev);
390 iph = &tunnel->parms.iph;
391
392 if (iph->daddr) {
393 struct rtable *rt;
394 struct flowi4 fl4;
395 memset(&fl4, 0, sizeof(fl4));
396 flowi4_init_output(&fl4, tunnel->parms.link,
397 be32_to_cpu(tunnel->parms.i_key),
398 RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
399 IPPROTO_IPIP, 0,
400 iph->daddr, iph->saddr, 0, 0);
401 rt = ip_route_output_key(dev_net(dev), &fl4);
402 if (!IS_ERR(rt)) {
403 tdev = rt->dst.dev;
404 ip_rt_put(rt);
405 }
406 dev->flags |= IFF_POINTOPOINT;
407 }
408
409 if (!tdev && tunnel->parms.link)
410 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
411
412 if (tdev) {
413 dev->hard_header_len = tdev->hard_header_len +
414 sizeof(struct iphdr);
415 dev->mtu = tdev->mtu;
416 }
417 dev->iflink = tunnel->parms.link;
418 return dev->mtu;
419}
420
421static int 224static int
422vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 225vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
423{ 226{
424 int err = 0; 227 int err = 0;
425 struct ip_tunnel_parm p; 228 struct ip_tunnel_parm p;
426 struct ip_tunnel *t;
427 struct net *net = dev_net(dev);
428 struct vti_net *ipn = net_generic(net, vti_net_id);
429
430 switch (cmd) {
431 case SIOCGETTUNNEL:
432 t = NULL;
433 if (dev == ipn->fb_tunnel_dev) {
434 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
435 sizeof(p))) {
436 err = -EFAULT;
437 break;
438 }
439 t = vti_tunnel_locate(net, &p, 0);
440 }
441 if (t == NULL)
442 t = netdev_priv(dev);
443 memcpy(&p, &t->parms, sizeof(p));
444 p.i_flags |= GRE_KEY | VTI_ISVTI;
445 p.o_flags |= GRE_KEY;
446 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
447 err = -EFAULT;
448 break;
449 229
450 case SIOCADDTUNNEL: 230 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
451 case SIOCCHGTUNNEL: 231 return -EFAULT;
452 err = -EPERM;
453 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
454 goto done;
455 232
456 err = -EFAULT; 233 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
457 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
458 goto done;
459
460 err = -EINVAL;
461 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || 234 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
462 p.iph.ihl != 5) 235 p.iph.ihl != 5)
463 goto done; 236 return -EINVAL;
464 237 }
465 t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
466
467 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
468 if (t != NULL) {
469 if (t->dev != dev) {
470 err = -EEXIST;
471 break;
472 }
473 } else {
474 if (((dev->flags&IFF_POINTOPOINT) &&
475 !p.iph.daddr) ||
476 (!(dev->flags&IFF_POINTOPOINT) &&
477 p.iph.daddr)) {
478 err = -EINVAL;
479 break;
480 }
481 t = netdev_priv(dev);
482 vti_tunnel_unlink(ipn, t);
483 synchronize_net();
484 t->parms.iph.saddr = p.iph.saddr;
485 t->parms.iph.daddr = p.iph.daddr;
486 t->parms.i_key = p.i_key;
487 t->parms.o_key = p.o_key;
488 t->parms.iph.protocol = IPPROTO_IPIP;
489 memcpy(dev->dev_addr, &p.iph.saddr, 4);
490 memcpy(dev->broadcast, &p.iph.daddr, 4);
491 vti_tunnel_link(ipn, t);
492 netdev_state_change(dev);
493 }
494 }
495
496 if (t) {
497 err = 0;
498 if (cmd == SIOCCHGTUNNEL) {
499 t->parms.i_key = p.i_key;
500 t->parms.o_key = p.o_key;
501 if (t->parms.link != p.link) {
502 t->parms.link = p.link;
503 vti_tunnel_bind_dev(dev);
504 netdev_state_change(dev);
505 }
506 }
507 p.i_flags |= GRE_KEY | VTI_ISVTI;
508 p.o_flags |= GRE_KEY;
509 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
510 sizeof(p)))
511 err = -EFAULT;
512 } else
513 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
514 break;
515 238
516 case SIOCDELTUNNEL: 239 err = ip_tunnel_ioctl(dev, &p, cmd);
517 err = -EPERM; 240 if (err)
518 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 241 return err;
519 goto done;
520
521 if (dev == ipn->fb_tunnel_dev) {
522 err = -EFAULT;
523 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
524 sizeof(p)))
525 goto done;
526 err = -ENOENT;
527
528 t = vti_tunnel_locate(net, &p, 0);
529 if (t == NULL)
530 goto done;
531 err = -EPERM;
532 if (t->dev == ipn->fb_tunnel_dev)
533 goto done;
534 dev = t->dev;
535 }
536 unregister_netdevice(dev);
537 err = 0;
538 break;
539 242
540 default: 243 if (cmd != SIOCDELTUNNEL) {
541 err = -EINVAL; 244 p.i_flags |= GRE_KEY | VTI_ISVTI;
245 p.o_flags |= GRE_KEY;
542 } 246 }
543 247
544done: 248 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
545 return err; 249 return -EFAULT;
546}
547
548static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
549{
550 if (new_mtu < 68 || new_mtu > 0xFFF8)
551 return -EINVAL;
552 dev->mtu = new_mtu;
553 return 0; 250 return 0;
554} 251}
555 252
556static const struct net_device_ops vti_netdev_ops = { 253static const struct net_device_ops vti_netdev_ops = {
557 .ndo_init = vti_tunnel_init, 254 .ndo_init = vti_tunnel_init,
558 .ndo_uninit = vti_tunnel_uninit, 255 .ndo_uninit = ip_tunnel_uninit,
559 .ndo_start_xmit = vti_tunnel_xmit, 256 .ndo_start_xmit = vti_tunnel_xmit,
560 .ndo_do_ioctl = vti_tunnel_ioctl, 257 .ndo_do_ioctl = vti_tunnel_ioctl,
561 .ndo_change_mtu = vti_tunnel_change_mtu, 258 .ndo_change_mtu = ip_tunnel_change_mtu,
562 .ndo_get_stats64 = ip_tunnel_get_stats64, 259 .ndo_get_stats64 = ip_tunnel_get_stats64,
563}; 260};
564 261
565static void vti_dev_free(struct net_device *dev) 262static void vti_tunnel_setup(struct net_device *dev)
566{ 263{
567 free_percpu(dev->tstats); 264 dev->netdev_ops = &vti_netdev_ops;
568 free_netdev(dev); 265 ip_tunnel_setup(dev, vti_net_id);
569} 266}
570 267
571static void vti_tunnel_setup(struct net_device *dev) 268static int vti_tunnel_init(struct net_device *dev)
572{ 269{
573 dev->netdev_ops = &vti_netdev_ops; 270 struct ip_tunnel *tunnel = netdev_priv(dev);
574 dev->destructor = vti_dev_free; 271 struct iphdr *iph = &tunnel->parms.iph;
272
273 memcpy(dev->dev_addr, &iph->saddr, 4);
274 memcpy(dev->broadcast, &iph->daddr, 4);
575 275
576 dev->type = ARPHRD_TUNNEL; 276 dev->type = ARPHRD_TUNNEL;
577 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 277 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
@@ -582,45 +282,18 @@ static void vti_tunnel_setup(struct net_device *dev)
582 dev->features |= NETIF_F_NETNS_LOCAL; 282 dev->features |= NETIF_F_NETNS_LOCAL;
583 dev->features |= NETIF_F_LLTX; 283 dev->features |= NETIF_F_LLTX;
584 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 284 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
585}
586
587static int vti_tunnel_init(struct net_device *dev)
588{
589 struct ip_tunnel *tunnel = netdev_priv(dev);
590 285
591 tunnel->dev = dev; 286 return ip_tunnel_init(dev);
592 strcpy(tunnel->parms.name, dev->name);
593
594 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
595 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
596
597 dev->tstats = alloc_percpu(struct pcpu_tstats);
598 if (!dev->tstats)
599 return -ENOMEM;
600
601 return 0;
602} 287}
603 288
604static int __net_init vti_fb_tunnel_init(struct net_device *dev) 289static void __net_init vti_fb_tunnel_init(struct net_device *dev)
605{ 290{
606 struct ip_tunnel *tunnel = netdev_priv(dev); 291 struct ip_tunnel *tunnel = netdev_priv(dev);
607 struct iphdr *iph = &tunnel->parms.iph; 292 struct iphdr *iph = &tunnel->parms.iph;
608 struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
609
610 tunnel->dev = dev;
611 strcpy(tunnel->parms.name, dev->name);
612 293
613 iph->version = 4; 294 iph->version = 4;
614 iph->protocol = IPPROTO_IPIP; 295 iph->protocol = IPPROTO_IPIP;
615 iph->ihl = 5; 296 iph->ihl = 5;
616
617 dev->tstats = alloc_percpu(struct pcpu_tstats);
618 if (!dev->tstats)
619 return -ENOMEM;
620
621 dev_hold(dev);
622 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
623 return 0;
624} 297}
625 298
626static struct xfrm_tunnel vti_handler __read_mostly = { 299static struct xfrm_tunnel vti_handler __read_mostly = {
@@ -629,76 +302,30 @@ static struct xfrm_tunnel vti_handler __read_mostly = {
629 .priority = 1, 302 .priority = 1,
630}; 303};
631 304
632static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
633{
634 int prio;
635
636 for (prio = 1; prio < 4; prio++) {
637 int h;
638 for (h = 0; h < HASH_SIZE; h++) {
639 struct ip_tunnel *t;
640
641 t = rtnl_dereference(ipn->tunnels[prio][h]);
642 while (t != NULL) {
643 unregister_netdevice_queue(t->dev, head);
644 t = rtnl_dereference(t->next);
645 }
646 }
647 }
648}
649
650static int __net_init vti_init_net(struct net *net) 305static int __net_init vti_init_net(struct net *net)
651{ 306{
652 int err; 307 int err;
653 struct vti_net *ipn = net_generic(net, vti_net_id); 308 struct ip_tunnel_net *itn;
654
655 ipn->tunnels[0] = ipn->tunnels_wc;
656 ipn->tunnels[1] = ipn->tunnels_l;
657 ipn->tunnels[2] = ipn->tunnels_r;
658 ipn->tunnels[3] = ipn->tunnels_r_l;
659
660 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
661 "ip_vti0",
662 vti_tunnel_setup);
663 if (!ipn->fb_tunnel_dev) {
664 err = -ENOMEM;
665 goto err_alloc_dev;
666 }
667 dev_net_set(ipn->fb_tunnel_dev, net);
668
669 err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
670 if (err)
671 goto err_reg_dev;
672 ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
673 309
674 err = register_netdev(ipn->fb_tunnel_dev); 310 err = ip_tunnel_init_net(net, vti_net_id, &vti_link_ops, "ip_vti0");
675 if (err) 311 if (err)
676 goto err_reg_dev; 312 return err;
313 itn = net_generic(net, vti_net_id);
314 vti_fb_tunnel_init(itn->fb_tunnel_dev);
677 return 0; 315 return 0;
678
679err_reg_dev:
680 vti_dev_free(ipn->fb_tunnel_dev);
681err_alloc_dev:
682 /* nothing */
683 return err;
684} 316}
685 317
686static void __net_exit vti_exit_net(struct net *net) 318static void __net_exit vti_exit_net(struct net *net)
687{ 319{
688 struct vti_net *ipn = net_generic(net, vti_net_id); 320 struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
689 LIST_HEAD(list); 321 ip_tunnel_delete_net(itn, &vti_link_ops);
690
691 rtnl_lock();
692 vti_destroy_tunnels(ipn, &list);
693 unregister_netdevice_many(&list);
694 rtnl_unlock();
695} 322}
696 323
697static struct pernet_operations vti_net_ops = { 324static struct pernet_operations vti_net_ops = {
698 .init = vti_init_net, 325 .init = vti_init_net,
699 .exit = vti_exit_net, 326 .exit = vti_exit_net,
700 .id = &vti_net_id, 327 .id = &vti_net_id,
701 .size = sizeof(struct vti_net), 328 .size = sizeof(struct ip_tunnel_net),
702}; 329};
703 330
704static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 331static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -736,78 +363,19 @@ static void vti_netlink_parms(struct nlattr *data[],
736static int vti_newlink(struct net *src_net, struct net_device *dev, 363static int vti_newlink(struct net *src_net, struct net_device *dev,
737 struct nlattr *tb[], struct nlattr *data[]) 364 struct nlattr *tb[], struct nlattr *data[])
738{ 365{
739 struct ip_tunnel *nt; 366 struct ip_tunnel_parm parms;
740 struct net *net = dev_net(dev);
741 struct vti_net *ipn = net_generic(net, vti_net_id);
742 int mtu;
743 int err;
744
745 nt = netdev_priv(dev);
746 vti_netlink_parms(data, &nt->parms);
747
748 if (vti_tunnel_locate(net, &nt->parms, 0))
749 return -EEXIST;
750 367
751 mtu = vti_tunnel_bind_dev(dev); 368 vti_netlink_parms(data, &parms);
752 if (!tb[IFLA_MTU]) 369 return ip_tunnel_newlink(dev, tb, &parms);
753 dev->mtu = mtu;
754
755 err = register_netdevice(dev);
756 if (err)
757 goto out;
758
759 dev_hold(dev);
760 vti_tunnel_link(ipn, nt);
761
762out:
763 return err;
764} 370}
765 371
766static int vti_changelink(struct net_device *dev, struct nlattr *tb[], 372static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
767 struct nlattr *data[]) 373 struct nlattr *data[])
768{ 374{
769 struct ip_tunnel *t, *nt;
770 struct net *net = dev_net(dev);
771 struct vti_net *ipn = net_generic(net, vti_net_id);
772 struct ip_tunnel_parm p; 375 struct ip_tunnel_parm p;
773 int mtu;
774
775 if (dev == ipn->fb_tunnel_dev)
776 return -EINVAL;
777 376
778 nt = netdev_priv(dev);
779 vti_netlink_parms(data, &p); 377 vti_netlink_parms(data, &p);
780 378 return ip_tunnel_changelink(dev, tb, &p);
781 t = vti_tunnel_locate(net, &p, 0);
782
783 if (t) {
784 if (t->dev != dev)
785 return -EEXIST;
786 } else {
787 t = nt;
788
789 vti_tunnel_unlink(ipn, t);
790 t->parms.iph.saddr = p.iph.saddr;
791 t->parms.iph.daddr = p.iph.daddr;
792 t->parms.i_key = p.i_key;
793 t->parms.o_key = p.o_key;
794 if (dev->type != ARPHRD_ETHER) {
795 memcpy(dev->dev_addr, &p.iph.saddr, 4);
796 memcpy(dev->broadcast, &p.iph.daddr, 4);
797 }
798 vti_tunnel_link(ipn, t);
799 netdev_state_change(dev);
800 }
801
802 if (t->parms.link != p.link) {
803 t->parms.link = p.link;
804 mtu = vti_tunnel_bind_dev(dev);
805 if (!tb[IFLA_MTU])
806 dev->mtu = mtu;
807 netdev_state_change(dev);
808 }
809
810 return 0;
811} 379}
812 380
813static size_t vti_get_size(const struct net_device *dev) 381static size_t vti_get_size(const struct net_device *dev)
@@ -873,7 +441,7 @@ static int __init vti_init(void)
873 err = xfrm4_mode_tunnel_input_register(&vti_handler); 441 err = xfrm4_mode_tunnel_input_register(&vti_handler);
874 if (err < 0) { 442 if (err < 0) {
875 unregister_pernet_device(&vti_net_ops); 443 unregister_pernet_device(&vti_net_ops);
876 pr_info(KERN_INFO "vti init: can't register tunnel\n"); 444 pr_info("vti init: can't register tunnel\n");
877 } 445 }
878 446
879 err = rtnl_link_register(&vti_link_ops); 447 err = rtnl_link_register(&vti_link_ops);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 59cb8c769056..826be4cb482a 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -47,12 +47,9 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
47 if (!x) 47 if (!x)
48 return; 48 return;
49 49
50 if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { 50 if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
51 atomic_inc(&flow_cache_genid);
52 rt_genid_bump(net);
53
54 ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); 51 ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
55 } else 52 else
56 ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); 53 ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
57 xfrm_state_put(x); 54 xfrm_state_put(x);
58} 55}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 77bfcce64fe5..7f80fb4b82d3 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -188,13 +188,16 @@ static int ipip_rcv(struct sk_buff *skb)
188 struct net *net = dev_net(skb->dev); 188 struct net *net = dev_net(skb->dev);
189 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); 189 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
190 struct ip_tunnel *tunnel; 190 struct ip_tunnel *tunnel;
191 const struct iphdr *iph = ip_hdr(skb); 191 const struct iphdr *iph;
192 192
193 iph = ip_hdr(skb);
193 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, 194 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
194 iph->saddr, iph->daddr, 0); 195 iph->saddr, iph->daddr, 0);
195 if (tunnel) { 196 if (tunnel) {
196 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 197 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
197 goto drop; 198 goto drop;
199 if (iptunnel_pull_header(skb, 0, tpi.proto))
200 goto drop;
198 return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); 201 return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
199 } 202 }
200 203
@@ -222,7 +225,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
222 skb->encapsulation = 1; 225 skb->encapsulation = 1;
223 } 226 }
224 227
225 ip_tunnel_xmit(skb, dev, tiph); 228 ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
226 return NETDEV_TX_OK; 229 return NETDEV_TX_OK;
227 230
228tx_error: 231tx_error:
@@ -240,11 +243,13 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
240 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 243 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
241 return -EFAULT; 244 return -EFAULT;
242 245
243 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || 246 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
244 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) 247 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
245 return -EINVAL; 248 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
246 if (p.i_key || p.o_key || p.i_flags || p.o_flags) 249 return -EINVAL;
247 return -EINVAL; 250 }
251
252 p.i_key = p.o_key = p.i_flags = p.o_flags = 0;
248 if (p.iph.ttl) 253 if (p.iph.ttl)
249 p.iph.frag_off |= htons(IP_DF); 254 p.iph.frag_off |= htons(IP_DF);
250 255
@@ -280,7 +285,6 @@ static void ipip_tunnel_setup(struct net_device *dev)
280 dev->flags = IFF_NOARP; 285 dev->flags = IFF_NOARP;
281 dev->iflink = 0; 286 dev->iflink = 0;
282 dev->addr_len = 4; 287 dev->addr_len = 4;
283 dev->features |= NETIF_F_NETNS_LOCAL;
284 dev->features |= NETIF_F_LLTX; 288 dev->features |= NETIF_F_LLTX;
285 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 289 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
286 290
@@ -431,7 +435,7 @@ static int __net_init ipip_init_net(struct net *net)
431static void __net_exit ipip_exit_net(struct net *net) 435static void __net_exit ipip_exit_net(struct net *net)
432{ 436{
433 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); 437 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
434 ip_tunnel_delete_net(itn); 438 ip_tunnel_delete_net(itn, &ipip_link_ops);
435} 439}
436 440
437static struct pernet_operations ipip_net_ops = { 441static struct pernet_operations ipip_net_ops = {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d9610ae7855..62212c772a4b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -127,9 +127,9 @@ static struct kmem_cache *mrt_cachep __read_mostly;
127static struct mr_table *ipmr_new_table(struct net *net, u32 id); 127static struct mr_table *ipmr_new_table(struct net *net, u32 id);
128static void ipmr_free_table(struct mr_table *mrt); 128static void ipmr_free_table(struct mr_table *mrt);
129 129
130static int ip_mr_forward(struct net *net, struct mr_table *mrt, 130static void ip_mr_forward(struct net *net, struct mr_table *mrt,
131 struct sk_buff *skb, struct mfc_cache *cache, 131 struct sk_buff *skb, struct mfc_cache *cache,
132 int local); 132 int local);
133static int ipmr_cache_report(struct mr_table *mrt, 133static int ipmr_cache_report(struct mr_table *mrt,
134 struct sk_buff *pkt, vifi_t vifi, int assert); 134 struct sk_buff *pkt, vifi_t vifi, int assert);
135static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 135static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
@@ -980,7 +980,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
980 980
981 /* Copy the IP header */ 981 /* Copy the IP header */
982 982
983 skb->network_header = skb->tail; 983 skb_set_network_header(skb, skb->len);
984 skb_put(skb, ihl); 984 skb_put(skb, ihl);
985 skb_copy_to_linear_data(skb, pkt->data, ihl); 985 skb_copy_to_linear_data(skb, pkt->data, ihl);
986 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ 986 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
@@ -1609,7 +1609,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1609 1609
1610static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1610static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1611{ 1611{
1612 struct net_device *dev = ptr; 1612 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1613 struct net *net = dev_net(dev); 1613 struct net *net = dev_net(dev);
1614 struct mr_table *mrt; 1614 struct mr_table *mrt;
1615 struct vif_device *v; 1615 struct vif_device *v;
@@ -1658,7 +1658,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1658 iph->protocol = IPPROTO_IPIP; 1658 iph->protocol = IPPROTO_IPIP;
1659 iph->ihl = 5; 1659 iph->ihl = 5;
1660 iph->tot_len = htons(skb->len); 1660 iph->tot_len = htons(skb->len);
1661 ip_select_ident(iph, skb_dst(skb), NULL); 1661 ip_select_ident(skb, skb_dst(skb), NULL);
1662 ip_send_check(iph); 1662 ip_send_check(iph);
1663 1663
1664 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1664 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1795,9 +1795,9 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1795 1795
1796/* "local" means that we should preserve one skb (for local delivery) */ 1796/* "local" means that we should preserve one skb (for local delivery) */
1797 1797
1798static int ip_mr_forward(struct net *net, struct mr_table *mrt, 1798static void ip_mr_forward(struct net *net, struct mr_table *mrt,
1799 struct sk_buff *skb, struct mfc_cache *cache, 1799 struct sk_buff *skb, struct mfc_cache *cache,
1800 int local) 1800 int local)
1801{ 1801{
1802 int psend = -1; 1802 int psend = -1;
1803 int vif, ct; 1803 int vif, ct;
@@ -1903,14 +1903,13 @@ last_forward:
1903 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 1903 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1904 } else { 1904 } else {
1905 ipmr_queue_xmit(net, mrt, skb, cache, psend); 1905 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1906 return 0; 1906 return;
1907 } 1907 }
1908 } 1908 }
1909 1909
1910dont_forward: 1910dont_forward:
1911 if (!local) 1911 if (!local)
1912 kfree_skb(skb); 1912 kfree_skb(skb);
1913 return 0;
1914} 1913}
1915 1914
1916static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) 1915static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
@@ -2068,9 +2067,8 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
2068 skb_reset_network_header(skb); 2067 skb_reset_network_header(skb);
2069 skb->protocol = htons(ETH_P_IP); 2068 skb->protocol = htons(ETH_P_IP);
2070 skb->ip_summed = CHECKSUM_NONE; 2069 skb->ip_summed = CHECKSUM_NONE;
2071 skb->pkt_type = PACKET_HOST;
2072 2070
2073 skb_tunnel_rx(skb, reg_dev); 2071 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
2074 2072
2075 netif_rx(skb); 2073 netif_rx(skb);
2076 2074
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index e7916c193932..1657e39b291f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -110,8 +110,21 @@ config IP_NF_TARGET_REJECT
110 110
111 To compile it as a module, choose M here. If unsure, say N. 111 To compile it as a module, choose M here. If unsure, say N.
112 112
113config IP_NF_TARGET_SYNPROXY
114 tristate "SYNPROXY target support"
115 depends on NF_CONNTRACK && NETFILTER_ADVANCED
116 select NETFILTER_SYNPROXY
117 select SYN_COOKIES
118 help
119 The SYNPROXY target allows you to intercept TCP connections and
120 establish them using syncookies before they are passed on to the
121 server. This allows to avoid conntrack and server resource usage
122 during SYN-flood attacks.
123
124 To compile it as a module, choose M here. If unsure, say N.
125
113config IP_NF_TARGET_ULOG 126config IP_NF_TARGET_ULOG
114 tristate "ULOG target support" 127 tristate "ULOG target support (obsolete)"
115 default m if NETFILTER_ADVANCED=n 128 default m if NETFILTER_ADVANCED=n
116 ---help--- 129 ---help---
117 130
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 007b128eecc9..3622b248b6dd 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
46obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o 46obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
47obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o 47obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
48obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o 48obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
49obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
49obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o 50obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
50 51
51# generic ARP tables 52# generic ARP tables
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index eadab1ed6500..a865f6f94013 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -48,7 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
48 net->ipv4.arptable_filter = 48 net->ipv4.arptable_filter =
49 arpt_register_table(net, &packet_filter, repl); 49 arpt_register_table(net, &packet_filter, repl);
50 kfree(repl); 50 kfree(repl);
51 return PTR_RET(net->ipv4.arptable_filter); 51 return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter);
52} 52}
53 53
54static void __net_exit arptable_filter_net_exit(struct net *net) 54static void __net_exit arptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5d5d4d1be9c2..00352ce0f0de 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -108,7 +108,7 @@ static int masq_device_event(struct notifier_block *this,
108 unsigned long event, 108 unsigned long event,
109 void *ptr) 109 void *ptr)
110{ 110{
111 const struct net_device *dev = ptr; 111 const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
112 struct net *net = dev_net(dev); 112 struct net *net = dev_net(dev);
113 113
114 if (event == NETDEV_DOWN) { 114 if (event == NETDEV_DOWN) {
@@ -118,7 +118,7 @@ static int masq_device_event(struct notifier_block *this,
118 NF_CT_ASSERT(dev->ifindex != 0); 118 NF_CT_ASSERT(dev->ifindex != 0);
119 119
120 nf_ct_iterate_cleanup(net, device_cmp, 120 nf_ct_iterate_cleanup(net, device_cmp,
121 (void *)(long)dev->ifindex); 121 (void *)(long)dev->ifindex, 0, 0);
122 } 122 }
123 123
124 return NOTIFY_DONE; 124 return NOTIFY_DONE;
@@ -129,7 +129,10 @@ static int masq_inet_event(struct notifier_block *this,
129 void *ptr) 129 void *ptr)
130{ 130{
131 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; 131 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
132 return masq_device_event(this, event, dev); 132 struct netdev_notifier_info info;
133
134 netdev_notifier_info_init(&info, dev);
135 return masq_device_event(this, event, &info);
133} 136}
134 137
135static struct notifier_block masq_dev_notifier = { 138static struct notifier_block masq_dev_notifier = {
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 04b18c1ac345..b969131ad1c1 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -119,7 +119,26 @@ static void send_reset(struct sk_buff *oldskb, int hook)
119 119
120 nf_ct_attach(nskb, oldskb); 120 nf_ct_attach(nskb, oldskb);
121 121
122 ip_local_out(nskb); 122#ifdef CONFIG_BRIDGE_NETFILTER
123 /* If we use ip_local_out for bridged traffic, the MAC source on
124 * the RST will be ours, instead of the destination's. This confuses
125 * some routers/firewalls, and they drop the packet. So we need to
126 * build the eth header using the original destination's MAC as the
127 * source, and send the RST packet directly.
128 */
129 if (oldskb->nf_bridge) {
130 struct ethhdr *oeth = eth_hdr(oldskb);
131 nskb->dev = oldskb->nf_bridge->physindev;
132 niph->tot_len = htons(nskb->len);
133 ip_send_check(niph);
134 if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
135 oeth->h_source, oeth->h_dest, nskb->len) < 0)
136 goto free_nskb;
137 dev_queue_xmit(nskb);
138 } else
139#endif
140 ip_local_out(nskb);
141
123 return; 142 return;
124 143
125 free_nskb: 144 free_nskb:
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
new file mode 100644
index 000000000000..b6346bf2fde3
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -0,0 +1,480 @@
1/*
2 * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <net/tcp.h>
12
13#include <linux/netfilter_ipv4/ip_tables.h>
14#include <linux/netfilter/x_tables.h>
15#include <linux/netfilter/xt_SYNPROXY.h>
16#include <net/netfilter/nf_conntrack.h>
17#include <net/netfilter/nf_conntrack_seqadj.h>
18#include <net/netfilter/nf_conntrack_synproxy.h>
19
20static struct iphdr *
21synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr)
22{
23 struct iphdr *iph;
24
25 skb_reset_network_header(skb);
26 iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
27 iph->version = 4;
28 iph->ihl = sizeof(*iph) / 4;
29 iph->tos = 0;
30 iph->id = 0;
31 iph->frag_off = htons(IP_DF);
32 iph->ttl = sysctl_ip_default_ttl;
33 iph->protocol = IPPROTO_TCP;
34 iph->check = 0;
35 iph->saddr = saddr;
36 iph->daddr = daddr;
37
38 return iph;
39}
40
41static void
42synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
43 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
44 struct iphdr *niph, struct tcphdr *nth,
45 unsigned int tcp_hdr_size)
46{
47 nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
48 nskb->ip_summed = CHECKSUM_PARTIAL;
49 nskb->csum_start = (unsigned char *)nth - nskb->head;
50 nskb->csum_offset = offsetof(struct tcphdr, check);
51
52 skb_dst_set_noref(nskb, skb_dst(skb));
53 nskb->protocol = htons(ETH_P_IP);
54 if (ip_route_me_harder(nskb, RTN_UNSPEC))
55 goto free_nskb;
56
57 if (nfct) {
58 nskb->nfct = nfct;
59 nskb->nfctinfo = ctinfo;
60 nf_conntrack_get(nfct);
61 }
62
63 ip_local_out(nskb);
64 return;
65
66free_nskb:
67 kfree_skb(nskb);
68}
69
70static void
71synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
72 const struct synproxy_options *opts)
73{
74 struct sk_buff *nskb;
75 struct iphdr *iph, *niph;
76 struct tcphdr *nth;
77 unsigned int tcp_hdr_size;
78 u16 mss = opts->mss;
79
80 iph = ip_hdr(skb);
81
82 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
83 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
84 GFP_ATOMIC);
85 if (nskb == NULL)
86 return;
87 skb_reserve(nskb, MAX_TCP_HEADER);
88
89 niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
90
91 skb_reset_transport_header(nskb);
92 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
93 nth->source = th->dest;
94 nth->dest = th->source;
95 nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss));
96 nth->ack_seq = htonl(ntohl(th->seq) + 1);
97 tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
98 if (opts->options & XT_SYNPROXY_OPT_ECN)
99 tcp_flag_word(nth) |= TCP_FLAG_ECE;
100 nth->doff = tcp_hdr_size / 4;
101 nth->window = 0;
102 nth->check = 0;
103 nth->urg_ptr = 0;
104
105 synproxy_build_options(nth, opts);
106
107 synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
108 niph, nth, tcp_hdr_size);
109}
110
111static void
112synproxy_send_server_syn(const struct synproxy_net *snet,
113 const struct sk_buff *skb, const struct tcphdr *th,
114 const struct synproxy_options *opts, u32 recv_seq)
115{
116 struct sk_buff *nskb;
117 struct iphdr *iph, *niph;
118 struct tcphdr *nth;
119 unsigned int tcp_hdr_size;
120
121 iph = ip_hdr(skb);
122
123 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
124 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
125 GFP_ATOMIC);
126 if (nskb == NULL)
127 return;
128 skb_reserve(nskb, MAX_TCP_HEADER);
129
130 niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
131
132 skb_reset_transport_header(nskb);
133 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
134 nth->source = th->source;
135 nth->dest = th->dest;
136 nth->seq = htonl(recv_seq - 1);
137 /* ack_seq is used to relay our ISN to the synproxy hook to initialize
138 * sequence number translation once a connection tracking entry exists.
139 */
140 nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
141 tcp_flag_word(nth) = TCP_FLAG_SYN;
142 if (opts->options & XT_SYNPROXY_OPT_ECN)
143 tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
144 nth->doff = tcp_hdr_size / 4;
145 nth->window = th->window;
146 nth->check = 0;
147 nth->urg_ptr = 0;
148
149 synproxy_build_options(nth, opts);
150
151 synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
152 niph, nth, tcp_hdr_size);
153}
154
155static void
156synproxy_send_server_ack(const struct synproxy_net *snet,
157 const struct ip_ct_tcp *state,
158 const struct sk_buff *skb, const struct tcphdr *th,
159 const struct synproxy_options *opts)
160{
161 struct sk_buff *nskb;
162 struct iphdr *iph, *niph;
163 struct tcphdr *nth;
164 unsigned int tcp_hdr_size;
165
166 iph = ip_hdr(skb);
167
168 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
169 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
170 GFP_ATOMIC);
171 if (nskb == NULL)
172 return;
173 skb_reserve(nskb, MAX_TCP_HEADER);
174
175 niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
176
177 skb_reset_transport_header(nskb);
178 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
179 nth->source = th->dest;
180 nth->dest = th->source;
181 nth->seq = htonl(ntohl(th->ack_seq));
182 nth->ack_seq = htonl(ntohl(th->seq) + 1);
183 tcp_flag_word(nth) = TCP_FLAG_ACK;
184 nth->doff = tcp_hdr_size / 4;
185 nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
186 nth->check = 0;
187 nth->urg_ptr = 0;
188
189 synproxy_build_options(nth, opts);
190
191 synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
192}
193
194static void
195synproxy_send_client_ack(const struct synproxy_net *snet,
196 const struct sk_buff *skb, const struct tcphdr *th,
197 const struct synproxy_options *opts)
198{
199 struct sk_buff *nskb;
200 struct iphdr *iph, *niph;
201 struct tcphdr *nth;
202 unsigned int tcp_hdr_size;
203
204 iph = ip_hdr(skb);
205
206 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
207 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
208 GFP_ATOMIC);
209 if (nskb == NULL)
210 return;
211 skb_reserve(nskb, MAX_TCP_HEADER);
212
213 niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
214
215 skb_reset_transport_header(nskb);
216 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
217 nth->source = th->source;
218 nth->dest = th->dest;
219 nth->seq = htonl(ntohl(th->seq) + 1);
220 nth->ack_seq = th->ack_seq;
221 tcp_flag_word(nth) = TCP_FLAG_ACK;
222 nth->doff = tcp_hdr_size / 4;
223 nth->window = ntohs(htons(th->window) >> opts->wscale);
224 nth->check = 0;
225 nth->urg_ptr = 0;
226
227 synproxy_build_options(nth, opts);
228
229 synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
230}
231
232static bool
233synproxy_recv_client_ack(const struct synproxy_net *snet,
234 const struct sk_buff *skb, const struct tcphdr *th,
235 struct synproxy_options *opts, u32 recv_seq)
236{
237 int mss;
238
239 mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
240 if (mss == 0) {
241 this_cpu_inc(snet->stats->cookie_invalid);
242 return false;
243 }
244
245 this_cpu_inc(snet->stats->cookie_valid);
246 opts->mss = mss;
247
248 if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
249 synproxy_check_timestamp_cookie(opts);
250
251 synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
252 return true;
253}
254
255static unsigned int
256synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
257{
258 const struct xt_synproxy_info *info = par->targinfo;
259 struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
260 struct synproxy_options opts = {};
261 struct tcphdr *th, _th;
262
263 if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
264 return NF_DROP;
265
266 th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
267 if (th == NULL)
268 return NF_DROP;
269
270 if (!synproxy_parse_options(skb, par->thoff, th, &opts))
271 return NF_DROP;
272
273 if (th->syn && !(th->ack || th->fin || th->rst)) {
274 /* Initial SYN from client */
275 this_cpu_inc(snet->stats->syn_received);
276
277 if (th->ece && th->cwr)
278 opts.options |= XT_SYNPROXY_OPT_ECN;
279
280 opts.options &= info->options;
281 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
282 synproxy_init_timestamp_cookie(info, &opts);
283 else
284 opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
285 XT_SYNPROXY_OPT_SACK_PERM |
286 XT_SYNPROXY_OPT_ECN);
287
288 synproxy_send_client_synack(skb, th, &opts);
289 return NF_DROP;
290
291 } else if (th->ack && !(th->fin || th->rst || th->syn)) {
292 /* ACK from client */
293 synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
294 return NF_DROP;
295 }
296
297 return XT_CONTINUE;
298}
299
300static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
301 struct sk_buff *skb,
302 const struct net_device *in,
303 const struct net_device *out,
304 int (*okfn)(struct sk_buff *))
305{
306 struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
307 enum ip_conntrack_info ctinfo;
308 struct nf_conn *ct;
309 struct nf_conn_synproxy *synproxy;
310 struct synproxy_options opts = {};
311 const struct ip_ct_tcp *state;
312 struct tcphdr *th, _th;
313 unsigned int thoff;
314
315 ct = nf_ct_get(skb, &ctinfo);
316 if (ct == NULL)
317 return NF_ACCEPT;
318
319 synproxy = nfct_synproxy(ct);
320 if (synproxy == NULL)
321 return NF_ACCEPT;
322
323 if (nf_is_loopback_packet(skb))
324 return NF_ACCEPT;
325
326 thoff = ip_hdrlen(skb);
327 th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
328 if (th == NULL)
329 return NF_DROP;
330
331 state = &ct->proto.tcp;
332 switch (state->state) {
333 case TCP_CONNTRACK_CLOSE:
334 if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
335 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
336 ntohl(th->seq) + 1);
337 break;
338 }
339
340 if (!th->syn || th->ack ||
341 CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
342 break;
343
344 /* Reopened connection - reset the sequence number and timestamp
345 * adjustments, they will get initialized once the connection is
346 * reestablished.
347 */
348 nf_ct_seqadj_init(ct, ctinfo, 0);
349 synproxy->tsoff = 0;
350 this_cpu_inc(snet->stats->conn_reopened);
351
352 /* fall through */
353 case TCP_CONNTRACK_SYN_SENT:
354 if (!synproxy_parse_options(skb, thoff, th, &opts))
355 return NF_DROP;
356
357 if (!th->syn && th->ack &&
358 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
359 /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
360 * therefore we need to add 1 to make the SYN sequence
361 * number match the one of first SYN.
362 */
363 if (synproxy_recv_client_ack(snet, skb, th, &opts,
364 ntohl(th->seq) + 1))
365 this_cpu_inc(snet->stats->cookie_retrans);
366
367 return NF_DROP;
368 }
369
370 synproxy->isn = ntohl(th->ack_seq);
371 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
372 synproxy->its = opts.tsecr;
373 break;
374 case TCP_CONNTRACK_SYN_RECV:
375 if (!th->syn || !th->ack)
376 break;
377
378 if (!synproxy_parse_options(skb, thoff, th, &opts))
379 return NF_DROP;
380
381 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
382 synproxy->tsoff = opts.tsval - synproxy->its;
383
384 opts.options &= ~(XT_SYNPROXY_OPT_MSS |
385 XT_SYNPROXY_OPT_WSCALE |
386 XT_SYNPROXY_OPT_SACK_PERM);
387
388 swap(opts.tsval, opts.tsecr);
389 synproxy_send_server_ack(snet, state, skb, th, &opts);
390
391 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
392
393 swap(opts.tsval, opts.tsecr);
394 synproxy_send_client_ack(snet, skb, th, &opts);
395
396 consume_skb(skb);
397 return NF_STOLEN;
398 default:
399 break;
400 }
401
402 synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
403 return NF_ACCEPT;
404}
405
406static int synproxy_tg4_check(const struct xt_tgchk_param *par)
407{
408 const struct ipt_entry *e = par->entryinfo;
409
410 if (e->ip.proto != IPPROTO_TCP ||
411 e->ip.invflags & XT_INV_PROTO)
412 return -EINVAL;
413
414 return nf_ct_l3proto_try_module_get(par->family);
415}
416
417static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
418{
419 nf_ct_l3proto_module_put(par->family);
420}
421
422static struct xt_target synproxy_tg4_reg __read_mostly = {
423 .name = "SYNPROXY",
424 .family = NFPROTO_IPV4,
425 .target = synproxy_tg4,
426 .targetsize = sizeof(struct xt_synproxy_info),
427 .checkentry = synproxy_tg4_check,
428 .destroy = synproxy_tg4_destroy,
429 .me = THIS_MODULE,
430};
431
432static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
433 {
434 .hook = ipv4_synproxy_hook,
435 .owner = THIS_MODULE,
436 .pf = NFPROTO_IPV4,
437 .hooknum = NF_INET_LOCAL_IN,
438 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
439 },
440 {
441 .hook = ipv4_synproxy_hook,
442 .owner = THIS_MODULE,
443 .pf = NFPROTO_IPV4,
444 .hooknum = NF_INET_POST_ROUTING,
445 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
446 },
447};
448
449static int __init synproxy_tg4_init(void)
450{
451 int err;
452
453 err = nf_register_hooks(ipv4_synproxy_ops,
454 ARRAY_SIZE(ipv4_synproxy_ops));
455 if (err < 0)
456 goto err1;
457
458 err = xt_register_target(&synproxy_tg4_reg);
459 if (err < 0)
460 goto err2;
461
462 return 0;
463
464err2:
465 nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
466err1:
467 return err;
468}
469
470static void __exit synproxy_tg4_exit(void)
471{
472 xt_unregister_target(&synproxy_tg4_reg);
473 nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
474}
475
476module_init(synproxy_tg4_init);
477module_exit(synproxy_tg4_exit);
478
479MODULE_LICENSE("GPL");
480MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index ff4b781b1056..cbc22158af49 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -125,15 +125,16 @@ static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
125/* timer function to flush queue in flushtimeout time */ 125/* timer function to flush queue in flushtimeout time */
126static void ulog_timer(unsigned long data) 126static void ulog_timer(unsigned long data)
127{ 127{
128 unsigned int groupnum = *((unsigned int *)data);
128 struct ulog_net *ulog = container_of((void *)data, 129 struct ulog_net *ulog = container_of((void *)data,
129 struct ulog_net, 130 struct ulog_net,
130 nlgroup[*(unsigned int *)data]); 131 nlgroup[groupnum]);
131 pr_debug("timer function called, calling ulog_send\n"); 132 pr_debug("timer function called, calling ulog_send\n");
132 133
133 /* lock to protect against somebody modifying our structure 134 /* lock to protect against somebody modifying our structure
134 * from ipt_ulog_target at the same time */ 135 * from ipt_ulog_target at the same time */
135 spin_lock_bh(&ulog->lock); 136 spin_lock_bh(&ulog->lock);
136 ulog_send(ulog, data); 137 ulog_send(ulog, groupnum);
137 spin_unlock_bh(&ulog->lock); 138 spin_unlock_bh(&ulog->lock);
138} 139}
139 140
@@ -330,6 +331,12 @@ static int ulog_tg_check(const struct xt_tgchk_param *par)
330{ 331{
331 const struct ipt_ulog_info *loginfo = par->targinfo; 332 const struct ipt_ulog_info *loginfo = par->targinfo;
332 333
334 if (!par->net->xt.ulog_warn_deprecated) {
335 pr_info("ULOG is deprecated and it will be removed soon, "
336 "use NFLOG instead\n");
337 par->net->xt.ulog_warn_deprecated = true;
338 }
339
333 if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { 340 if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
334 pr_debug("prefix not null-terminated\n"); 341 pr_debug("prefix not null-terminated\n");
335 return -EINVAL; 342 return -EINVAL;
@@ -407,8 +414,11 @@ static int __net_init ulog_tg_net_init(struct net *net)
407 414
408 spin_lock_init(&ulog->lock); 415 spin_lock_init(&ulog->lock);
409 /* initialize ulog_buffers */ 416 /* initialize ulog_buffers */
410 for (i = 0; i < ULOG_MAXNLGROUPS; i++) 417 for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
411 setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, i); 418 ulog->nlgroup[i] = i;
419 setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer,
420 (unsigned long)&ulog->nlgroup[i]);
421 }
412 422
413 ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); 423 ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
414 if (!ulog->nflognl) 424 if (!ulog->nflognl)
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 6b3da5cf54e9..50af5b45c050 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -69,7 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
69 net->ipv4.iptable_filter = 69 net->ipv4.iptable_filter =
70 ipt_register_table(net, &packet_filter, repl); 70 ipt_register_table(net, &packet_filter, repl);
71 kfree(repl); 71 kfree(repl);
72 return PTR_RET(net->ipv4.iptable_filter); 72 return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter);
73} 73}
74 74
75static void __net_exit iptable_filter_net_exit(struct net *net) 75static void __net_exit iptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index cba5658ec82c..0d8cd82e0fad 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -107,7 +107,7 @@ static int __net_init iptable_mangle_net_init(struct net *net)
107 net->ipv4.iptable_mangle = 107 net->ipv4.iptable_mangle =
108 ipt_register_table(net, &packet_mangler, repl); 108 ipt_register_table(net, &packet_mangler, repl);
109 kfree(repl); 109 kfree(repl);
110 return PTR_RET(net->ipv4.iptable_mangle); 110 return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle);
111} 111}
112 112
113static void __net_exit iptable_mangle_net_exit(struct net *net) 113static void __net_exit iptable_mangle_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 6383273d54e1..683bfaffed65 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -292,7 +292,7 @@ static int __net_init iptable_nat_net_init(struct net *net)
292 return -ENOMEM; 292 return -ENOMEM;
293 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); 293 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
294 kfree(repl); 294 kfree(repl);
295 return PTR_RET(net->ipv4.nat_table); 295 return PTR_ERR_OR_ZERO(net->ipv4.nat_table);
296} 296}
297 297
298static void __net_exit iptable_nat_net_exit(struct net *net) 298static void __net_exit iptable_nat_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 03d9696d3c6e..1f82aea11df6 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -48,7 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net)
48 net->ipv4.iptable_raw = 48 net->ipv4.iptable_raw =
49 ipt_register_table(net, &packet_raw, repl); 49 ipt_register_table(net, &packet_raw, repl);
50 kfree(repl); 50 kfree(repl);
51 return PTR_RET(net->ipv4.iptable_raw); 51 return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw);
52} 52}
53 53
54static void __net_exit iptable_raw_net_exit(struct net *net) 54static void __net_exit iptable_raw_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index b283d8e2601a..f867a8d38bf7 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -66,7 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net)
66 net->ipv4.iptable_security = 66 net->ipv4.iptable_security =
67 ipt_register_table(net, &security_table, repl); 67 ipt_register_table(net, &security_table, repl);
68 kfree(repl); 68 kfree(repl);
69 return PTR_RET(net->ipv4.iptable_security); 69 return PTR_ERR_OR_ZERO(net->ipv4.iptable_security);
70} 70}
71 71
72static void __net_exit iptable_security_net_exit(struct net *net) 72static void __net_exit iptable_security_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 567d84168bd2..86f5b34a4ed1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -25,6 +25,7 @@
25#include <net/netfilter/nf_conntrack_l3proto.h> 25#include <net/netfilter/nf_conntrack_l3proto.h>
26#include <net/netfilter/nf_conntrack_zones.h> 26#include <net/netfilter/nf_conntrack_zones.h>
27#include <net/netfilter/nf_conntrack_core.h> 27#include <net/netfilter/nf_conntrack_core.h>
28#include <net/netfilter/nf_conntrack_seqadj.h>
28#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
29#include <net/netfilter/nf_nat_helper.h> 30#include <net/netfilter/nf_nat_helper.h>
30#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 31#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
@@ -136,11 +137,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
136 /* adjust seqs for loopback traffic only in outgoing direction */ 137 /* adjust seqs for loopback traffic only in outgoing direction */
137 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 138 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
138 !nf_is_loopback_packet(skb)) { 139 !nf_is_loopback_packet(skb)) {
139 typeof(nf_nat_seq_adjust_hook) seq_adjust; 140 if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
140
141 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
142 if (!seq_adjust ||
143 !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
144 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 141 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
145 return NF_DROP; 142 return NF_DROP;
146 } 143 }
@@ -223,7 +220,7 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
223static int log_invalid_proto_min = 0; 220static int log_invalid_proto_min = 0;
224static int log_invalid_proto_max = 255; 221static int log_invalid_proto_max = 255;
225 222
226static ctl_table ip_ct_sysctl_table[] = { 223static struct ctl_table ip_ct_sysctl_table[] = {
227 { 224 {
228 .procname = "ip_conntrack_max", 225 .procname = "ip_conntrack_max",
229 .maxlen = sizeof(int), 226 .maxlen = sizeof(int),
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 7d93d62cd5fd..d7d9882d4cae 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -33,7 +33,6 @@
33#include <linux/netdevice.h> 33#include <linux/netdevice.h>
34#include <net/snmp.h> 34#include <net/snmp.h>
35#include <net/ip.h> 35#include <net/ip.h>
36#include <net/ipv6.h>
37#include <net/icmp.h> 36#include <net/icmp.h>
38#include <net/protocol.h> 37#include <net/protocol.h>
39#include <linux/skbuff.h> 38#include <linux/skbuff.h>
@@ -46,8 +45,18 @@
46#include <net/inet_common.h> 45#include <net/inet_common.h>
47#include <net/checksum.h> 46#include <net/checksum.h>
48 47
48#if IS_ENABLED(CONFIG_IPV6)
49#include <linux/in6.h>
50#include <linux/icmpv6.h>
51#include <net/addrconf.h>
52#include <net/ipv6.h>
53#include <net/transp_v6.h>
54#endif
49 55
50static struct ping_table ping_table; 56
57struct ping_table ping_table;
58struct pingv6_ops pingv6_ops;
59EXPORT_SYMBOL_GPL(pingv6_ops);
51 60
52static u16 ping_port_rover; 61static u16 ping_port_rover;
53 62
@@ -58,6 +67,7 @@ static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int ma
58 pr_debug("hash(%d) = %d\n", num, res); 67 pr_debug("hash(%d) = %d\n", num, res);
59 return res; 68 return res;
60} 69}
70EXPORT_SYMBOL_GPL(ping_hash);
61 71
62static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, 72static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
63 struct net *net, unsigned int num) 73 struct net *net, unsigned int num)
@@ -65,7 +75,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
65 return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; 75 return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
66} 76}
67 77
68static int ping_v4_get_port(struct sock *sk, unsigned short ident) 78int ping_get_port(struct sock *sk, unsigned short ident)
69{ 79{
70 struct hlist_nulls_node *node; 80 struct hlist_nulls_node *node;
71 struct hlist_nulls_head *hlist; 81 struct hlist_nulls_head *hlist;
@@ -103,6 +113,10 @@ next_port:
103 ping_portaddr_for_each_entry(sk2, node, hlist) { 113 ping_portaddr_for_each_entry(sk2, node, hlist) {
104 isk2 = inet_sk(sk2); 114 isk2 = inet_sk(sk2);
105 115
116 /* BUG? Why is this reuse and not reuseaddr? ping.c
117 * doesn't turn off SO_REUSEADDR, and it doesn't expect
118 * that other ping processes can steal its packets.
119 */
106 if ((isk2->inet_num == ident) && 120 if ((isk2->inet_num == ident) &&
107 (sk2 != sk) && 121 (sk2 != sk) &&
108 (!sk2->sk_reuse || !sk->sk_reuse)) 122 (!sk2->sk_reuse || !sk->sk_reuse))
@@ -125,17 +139,18 @@ fail:
125 write_unlock_bh(&ping_table.lock); 139 write_unlock_bh(&ping_table.lock);
126 return 1; 140 return 1;
127} 141}
142EXPORT_SYMBOL_GPL(ping_get_port);
128 143
129static void ping_v4_hash(struct sock *sk) 144void ping_hash(struct sock *sk)
130{ 145{
131 pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); 146 pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
132 BUG(); /* "Please do not press this button again." */ 147 BUG(); /* "Please do not press this button again." */
133} 148}
134 149
135static void ping_v4_unhash(struct sock *sk) 150void ping_unhash(struct sock *sk)
136{ 151{
137 struct inet_sock *isk = inet_sk(sk); 152 struct inet_sock *isk = inet_sk(sk);
138 pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); 153 pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
139 if (sk_hashed(sk)) { 154 if (sk_hashed(sk)) {
140 write_lock_bh(&ping_table.lock); 155 write_lock_bh(&ping_table.lock);
141 hlist_nulls_del(&sk->sk_nulls_node); 156 hlist_nulls_del(&sk->sk_nulls_node);
@@ -146,31 +161,61 @@ static void ping_v4_unhash(struct sock *sk)
146 write_unlock_bh(&ping_table.lock); 161 write_unlock_bh(&ping_table.lock);
147 } 162 }
148} 163}
164EXPORT_SYMBOL_GPL(ping_unhash);
149 165
150static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr, 166static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
151 u16 ident, int dif)
152{ 167{
153 struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); 168 struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
154 struct sock *sk = NULL; 169 struct sock *sk = NULL;
155 struct inet_sock *isk; 170 struct inet_sock *isk;
156 struct hlist_nulls_node *hnode; 171 struct hlist_nulls_node *hnode;
172 int dif = skb->dev->ifindex;
173
174 if (skb->protocol == htons(ETH_P_IP)) {
175 pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
176 (int)ident, &ip_hdr(skb)->daddr, dif);
177#if IS_ENABLED(CONFIG_IPV6)
178 } else if (skb->protocol == htons(ETH_P_IPV6)) {
179 pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
180 (int)ident, &ipv6_hdr(skb)->daddr, dif);
181#endif
182 }
157 183
158 pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
159 (int)ident, &daddr, dif);
160 read_lock_bh(&ping_table.lock); 184 read_lock_bh(&ping_table.lock);
161 185
162 ping_portaddr_for_each_entry(sk, hnode, hslot) { 186 ping_portaddr_for_each_entry(sk, hnode, hslot) {
163 isk = inet_sk(sk); 187 isk = inet_sk(sk);
164 188
165 pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk,
166 (int)isk->inet_num, &isk->inet_rcv_saddr,
167 sk->sk_bound_dev_if);
168
169 pr_debug("iterate\n"); 189 pr_debug("iterate\n");
170 if (isk->inet_num != ident) 190 if (isk->inet_num != ident)
171 continue; 191 continue;
172 if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr) 192
173 continue; 193 if (skb->protocol == htons(ETH_P_IP) &&
194 sk->sk_family == AF_INET) {
195 pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk,
196 (int) isk->inet_num, &isk->inet_rcv_saddr,
197 sk->sk_bound_dev_if);
198
199 if (isk->inet_rcv_saddr &&
200 isk->inet_rcv_saddr != ip_hdr(skb)->daddr)
201 continue;
202#if IS_ENABLED(CONFIG_IPV6)
203 } else if (skb->protocol == htons(ETH_P_IPV6) &&
204 sk->sk_family == AF_INET6) {
205 struct ipv6_pinfo *np = inet6_sk(sk);
206
207 pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk,
208 (int) isk->inet_num,
209 &inet6_sk(sk)->rcv_saddr,
210 sk->sk_bound_dev_if);
211
212 if (!ipv6_addr_any(&np->rcv_saddr) &&
213 !ipv6_addr_equal(&np->rcv_saddr,
214 &ipv6_hdr(skb)->daddr))
215 continue;
216#endif
217 }
218
174 if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) 219 if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
175 continue; 220 continue;
176 221
@@ -200,7 +245,7 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
200} 245}
201 246
202 247
203static int ping_init_sock(struct sock *sk) 248int ping_init_sock(struct sock *sk)
204{ 249{
205 struct net *net = sock_net(sk); 250 struct net *net = sock_net(sk);
206 kgid_t group = current_egid(); 251 kgid_t group = current_egid();
@@ -225,8 +270,9 @@ static int ping_init_sock(struct sock *sk)
225 270
226 return -EACCES; 271 return -EACCES;
227} 272}
273EXPORT_SYMBOL_GPL(ping_init_sock);
228 274
229static void ping_close(struct sock *sk, long timeout) 275void ping_close(struct sock *sk, long timeout)
230{ 276{
231 pr_debug("ping_close(sk=%p,sk->num=%u)\n", 277 pr_debug("ping_close(sk=%p,sk->num=%u)\n",
232 inet_sk(sk), inet_sk(sk)->inet_num); 278 inet_sk(sk), inet_sk(sk)->inet_num);
@@ -234,36 +280,122 @@ static void ping_close(struct sock *sk, long timeout)
234 280
235 sk_common_release(sk); 281 sk_common_release(sk);
236} 282}
283EXPORT_SYMBOL_GPL(ping_close);
284
285/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
286static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
287 struct sockaddr *uaddr, int addr_len) {
288 struct net *net = sock_net(sk);
289 if (sk->sk_family == AF_INET) {
290 struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
291 int chk_addr_ret;
292
293 if (addr_len < sizeof(*addr))
294 return -EINVAL;
295
296 pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
297 sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
298
299 chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
300
301 if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
302 chk_addr_ret = RTN_LOCAL;
303
304 if ((sysctl_ip_nonlocal_bind == 0 &&
305 isk->freebind == 0 && isk->transparent == 0 &&
306 chk_addr_ret != RTN_LOCAL) ||
307 chk_addr_ret == RTN_MULTICAST ||
308 chk_addr_ret == RTN_BROADCAST)
309 return -EADDRNOTAVAIL;
310
311#if IS_ENABLED(CONFIG_IPV6)
312 } else if (sk->sk_family == AF_INET6) {
313 struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
314 int addr_type, scoped, has_addr;
315 struct net_device *dev = NULL;
316
317 if (addr_len < sizeof(*addr))
318 return -EINVAL;
319
320 pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n",
321 sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port));
322
323 addr_type = ipv6_addr_type(&addr->sin6_addr);
324 scoped = __ipv6_addr_needs_scope_id(addr_type);
325 if ((addr_type != IPV6_ADDR_ANY &&
326 !(addr_type & IPV6_ADDR_UNICAST)) ||
327 (scoped && !addr->sin6_scope_id))
328 return -EINVAL;
329
330 rcu_read_lock();
331 if (addr->sin6_scope_id) {
332 dev = dev_get_by_index_rcu(net, addr->sin6_scope_id);
333 if (!dev) {
334 rcu_read_unlock();
335 return -ENODEV;
336 }
337 }
338 has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
339 scoped);
340 rcu_read_unlock();
341
342 if (!(isk->freebind || isk->transparent || has_addr ||
343 addr_type == IPV6_ADDR_ANY))
344 return -EADDRNOTAVAIL;
345
346 if (scoped)
347 sk->sk_bound_dev_if = addr->sin6_scope_id;
348#endif
349 } else {
350 return -EAFNOSUPPORT;
351 }
352 return 0;
353}
354
355static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr)
356{
357 if (saddr->sa_family == AF_INET) {
358 struct inet_sock *isk = inet_sk(sk);
359 struct sockaddr_in *addr = (struct sockaddr_in *) saddr;
360 isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
361#if IS_ENABLED(CONFIG_IPV6)
362 } else if (saddr->sa_family == AF_INET6) {
363 struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr;
364 struct ipv6_pinfo *np = inet6_sk(sk);
365 np->rcv_saddr = np->saddr = addr->sin6_addr;
366#endif
367 }
368}
237 369
370static void ping_clear_saddr(struct sock *sk, int dif)
371{
372 sk->sk_bound_dev_if = dif;
373 if (sk->sk_family == AF_INET) {
374 struct inet_sock *isk = inet_sk(sk);
375 isk->inet_rcv_saddr = isk->inet_saddr = 0;
376#if IS_ENABLED(CONFIG_IPV6)
377 } else if (sk->sk_family == AF_INET6) {
378 struct ipv6_pinfo *np = inet6_sk(sk);
379 memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
380 memset(&np->saddr, 0, sizeof(np->saddr));
381#endif
382 }
383}
238/* 384/*
239 * We need our own bind because there are no privileged id's == local ports. 385 * We need our own bind because there are no privileged id's == local ports.
240 * Moreover, we don't allow binding to multi- and broadcast addresses. 386 * Moreover, we don't allow binding to multi- and broadcast addresses.
241 */ 387 */
242 388
243static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 389int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
244{ 390{
245 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
246 struct inet_sock *isk = inet_sk(sk); 391 struct inet_sock *isk = inet_sk(sk);
247 unsigned short snum; 392 unsigned short snum;
248 int chk_addr_ret;
249 int err; 393 int err;
394 int dif = sk->sk_bound_dev_if;
250 395
251 if (addr_len < sizeof(struct sockaddr_in)) 396 err = ping_check_bind_addr(sk, isk, uaddr, addr_len);
252 return -EINVAL; 397 if (err)
253 398 return err;
254 pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
255 sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
256
257 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
258 if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
259 chk_addr_ret = RTN_LOCAL;
260
261 if ((sysctl_ip_nonlocal_bind == 0 &&
262 isk->freebind == 0 && isk->transparent == 0 &&
263 chk_addr_ret != RTN_LOCAL) ||
264 chk_addr_ret == RTN_MULTICAST ||
265 chk_addr_ret == RTN_BROADCAST)
266 return -EADDRNOTAVAIL;
267 399
268 lock_sock(sk); 400 lock_sock(sk);
269 401
@@ -272,42 +404,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
272 goto out; 404 goto out;
273 405
274 err = -EADDRINUSE; 406 err = -EADDRINUSE;
275 isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; 407 ping_set_saddr(sk, uaddr);
276 snum = ntohs(addr->sin_port); 408 snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port);
277 if (ping_v4_get_port(sk, snum) != 0) { 409 if (ping_get_port(sk, snum) != 0) {
278 isk->inet_saddr = isk->inet_rcv_saddr = 0; 410 ping_clear_saddr(sk, dif);
279 goto out; 411 goto out;
280 } 412 }
281 413
282 pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n", 414 pr_debug("after bind(): num = %d, dif = %d\n",
283 (int)isk->inet_num, 415 (int)isk->inet_num,
284 &isk->inet_rcv_saddr,
285 (int)sk->sk_bound_dev_if); 416 (int)sk->sk_bound_dev_if);
286 417
287 err = 0; 418 err = 0;
288 if (isk->inet_rcv_saddr) 419 if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) ||
420 (sk->sk_family == AF_INET6 &&
421 !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)))
289 sk->sk_userlocks |= SOCK_BINDADDR_LOCK; 422 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
423
290 if (snum) 424 if (snum)
291 sk->sk_userlocks |= SOCK_BINDPORT_LOCK; 425 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
292 isk->inet_sport = htons(isk->inet_num); 426 isk->inet_sport = htons(isk->inet_num);
293 isk->inet_daddr = 0; 427 isk->inet_daddr = 0;
294 isk->inet_dport = 0; 428 isk->inet_dport = 0;
429
430#if IS_ENABLED(CONFIG_IPV6)
431 if (sk->sk_family == AF_INET6)
432 memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr));
433#endif
434
295 sk_dst_reset(sk); 435 sk_dst_reset(sk);
296out: 436out:
297 release_sock(sk); 437 release_sock(sk);
298 pr_debug("ping_v4_bind -> %d\n", err); 438 pr_debug("ping_v4_bind -> %d\n", err);
299 return err; 439 return err;
300} 440}
441EXPORT_SYMBOL_GPL(ping_bind);
301 442
302/* 443/*
303 * Is this a supported type of ICMP message? 444 * Is this a supported type of ICMP message?
304 */ 445 */
305 446
306static inline int ping_supported(int type, int code) 447static inline int ping_supported(int family, int type, int code)
307{ 448{
308 if (type == ICMP_ECHO && code == 0) 449 return (family == AF_INET && type == ICMP_ECHO && code == 0) ||
309 return 1; 450 (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0);
310 return 0;
311} 451}
312 452
313/* 453/*
@@ -315,30 +455,42 @@ static inline int ping_supported(int type, int code)
315 * sort of error condition. 455 * sort of error condition.
316 */ 456 */
317 457
318static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); 458void ping_err(struct sk_buff *skb, int offset, u32 info)
319
320void ping_err(struct sk_buff *skb, u32 info)
321{ 459{
322 struct iphdr *iph = (struct iphdr *)skb->data; 460 int family;
323 struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); 461 struct icmphdr *icmph;
324 struct inet_sock *inet_sock; 462 struct inet_sock *inet_sock;
325 int type = icmp_hdr(skb)->type; 463 int type;
326 int code = icmp_hdr(skb)->code; 464 int code;
327 struct net *net = dev_net(skb->dev); 465 struct net *net = dev_net(skb->dev);
328 struct sock *sk; 466 struct sock *sk;
329 int harderr; 467 int harderr;
330 int err; 468 int err;
331 469
470 if (skb->protocol == htons(ETH_P_IP)) {
471 family = AF_INET;
472 type = icmp_hdr(skb)->type;
473 code = icmp_hdr(skb)->code;
474 icmph = (struct icmphdr *)(skb->data + offset);
475 } else if (skb->protocol == htons(ETH_P_IPV6)) {
476 family = AF_INET6;
477 type = icmp6_hdr(skb)->icmp6_type;
478 code = icmp6_hdr(skb)->icmp6_code;
479 icmph = (struct icmphdr *) (skb->data + offset);
480 } else {
481 BUG();
482 }
483
332 /* We assume the packet has already been checked by icmp_unreach */ 484 /* We assume the packet has already been checked by icmp_unreach */
333 485
334 if (!ping_supported(icmph->type, icmph->code)) 486 if (!ping_supported(family, icmph->type, icmph->code))
335 return; 487 return;
336 488
337 pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type, 489 pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n",
338 code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); 490 skb->protocol, type, code, ntohs(icmph->un.echo.id),
491 ntohs(icmph->un.echo.sequence));
339 492
340 sk = ping_v4_lookup(net, iph->daddr, iph->saddr, 493 sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
341 ntohs(icmph->un.echo.id), skb->dev->ifindex);
342 if (sk == NULL) { 494 if (sk == NULL) {
343 pr_debug("no socket, dropping\n"); 495 pr_debug("no socket, dropping\n");
344 return; /* No socket for error */ 496 return; /* No socket for error */
@@ -349,72 +501,83 @@ void ping_err(struct sk_buff *skb, u32 info)
349 harderr = 0; 501 harderr = 0;
350 inet_sock = inet_sk(sk); 502 inet_sock = inet_sk(sk);
351 503
352 switch (type) { 504 if (skb->protocol == htons(ETH_P_IP)) {
353 default: 505 switch (type) {
354 case ICMP_TIME_EXCEEDED: 506 default:
355 err = EHOSTUNREACH; 507 case ICMP_TIME_EXCEEDED:
356 break; 508 err = EHOSTUNREACH;
357 case ICMP_SOURCE_QUENCH: 509 break;
358 /* This is not a real error but ping wants to see it. 510 case ICMP_SOURCE_QUENCH:
359 * Report it with some fake errno. */ 511 /* This is not a real error but ping wants to see it.
360 err = EREMOTEIO; 512 * Report it with some fake errno.
361 break; 513 */
362 case ICMP_PARAMETERPROB: 514 err = EREMOTEIO;
363 err = EPROTO; 515 break;
364 harderr = 1; 516 case ICMP_PARAMETERPROB:
365 break; 517 err = EPROTO;
366 case ICMP_DEST_UNREACH: 518 harderr = 1;
367 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ 519 break;
368 ipv4_sk_update_pmtu(skb, sk, info); 520 case ICMP_DEST_UNREACH:
369 if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { 521 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
370 err = EMSGSIZE; 522 ipv4_sk_update_pmtu(skb, sk, info);
371 harderr = 1; 523 if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
372 break; 524 err = EMSGSIZE;
525 harderr = 1;
526 break;
527 }
528 goto out;
373 } 529 }
374 goto out; 530 err = EHOSTUNREACH;
375 } 531 if (code <= NR_ICMP_UNREACH) {
376 err = EHOSTUNREACH; 532 harderr = icmp_err_convert[code].fatal;
377 if (code <= NR_ICMP_UNREACH) { 533 err = icmp_err_convert[code].errno;
378 harderr = icmp_err_convert[code].fatal; 534 }
379 err = icmp_err_convert[code].errno; 535 break;
536 case ICMP_REDIRECT:
537 /* See ICMP_SOURCE_QUENCH */
538 ipv4_sk_redirect(skb, sk);
539 err = EREMOTEIO;
540 break;
380 } 541 }
381 break; 542#if IS_ENABLED(CONFIG_IPV6)
382 case ICMP_REDIRECT: 543 } else if (skb->protocol == htons(ETH_P_IPV6)) {
383 /* See ICMP_SOURCE_QUENCH */ 544 harderr = pingv6_ops.icmpv6_err_convert(type, code, &err);
384 ipv4_sk_redirect(skb, sk); 545#endif
385 err = EREMOTEIO;
386 break;
387 } 546 }
388 547
389 /* 548 /*
390 * RFC1122: OK. Passes ICMP errors back to application, as per 549 * RFC1122: OK. Passes ICMP errors back to application, as per
391 * 4.1.3.3. 550 * 4.1.3.3.
392 */ 551 */
393 if (!inet_sock->recverr) { 552 if ((family == AF_INET && !inet_sock->recverr) ||
553 (family == AF_INET6 && !inet6_sk(sk)->recverr)) {
394 if (!harderr || sk->sk_state != TCP_ESTABLISHED) 554 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
395 goto out; 555 goto out;
396 } else { 556 } else {
397 ip_icmp_error(sk, skb, err, 0 /* no remote port */, 557 if (family == AF_INET) {
398 info, (u8 *)icmph); 558 ip_icmp_error(sk, skb, err, 0 /* no remote port */,
559 info, (u8 *)icmph);
560#if IS_ENABLED(CONFIG_IPV6)
561 } else if (family == AF_INET6) {
562 pingv6_ops.ipv6_icmp_error(sk, skb, err, 0,
563 info, (u8 *)icmph);
564#endif
565 }
399 } 566 }
400 sk->sk_err = err; 567 sk->sk_err = err;
401 sk->sk_error_report(sk); 568 sk->sk_error_report(sk);
402out: 569out:
403 sock_put(sk); 570 sock_put(sk);
404} 571}
572EXPORT_SYMBOL_GPL(ping_err);
405 573
406/* 574/*
407 * Copy and checksum an ICMP Echo packet from user space into a buffer. 575 * Copy and checksum an ICMP Echo packet from user space into a buffer
576 * starting from the payload.
408 */ 577 */
409 578
410struct pingfakehdr { 579int ping_getfrag(void *from, char *to,
411 struct icmphdr icmph; 580 int offset, int fraglen, int odd, struct sk_buff *skb)
412 struct iovec *iov;
413 __wsum wcheck;
414};
415
416static int ping_getfrag(void *from, char *to,
417 int offset, int fraglen, int odd, struct sk_buff *skb)
418{ 581{
419 struct pingfakehdr *pfh = (struct pingfakehdr *)from; 582 struct pingfakehdr *pfh = (struct pingfakehdr *)from;
420 583
@@ -425,20 +588,33 @@ static int ping_getfrag(void *from, char *to,
425 pfh->iov, 0, fraglen - sizeof(struct icmphdr), 588 pfh->iov, 0, fraglen - sizeof(struct icmphdr),
426 &pfh->wcheck)) 589 &pfh->wcheck))
427 return -EFAULT; 590 return -EFAULT;
591 } else if (offset < sizeof(struct icmphdr)) {
592 BUG();
593 } else {
594 if (csum_partial_copy_fromiovecend
595 (to, pfh->iov, offset - sizeof(struct icmphdr),
596 fraglen, &pfh->wcheck))
597 return -EFAULT;
598 }
428 599
429 return 0; 600#if IS_ENABLED(CONFIG_IPV6)
601 /* For IPv6, checksum each skb as we go along, as expected by
602 * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in
603 * wcheck, it will be finalized in ping_v4_push_pending_frames.
604 */
605 if (pfh->family == AF_INET6) {
606 skb->csum = pfh->wcheck;
607 skb->ip_summed = CHECKSUM_NONE;
608 pfh->wcheck = 0;
430 } 609 }
431 if (offset < sizeof(struct icmphdr)) 610#endif
432 BUG(); 611
433 if (csum_partial_copy_fromiovecend
434 (to, pfh->iov, offset - sizeof(struct icmphdr),
435 fraglen, &pfh->wcheck))
436 return -EFAULT;
437 return 0; 612 return 0;
438} 613}
614EXPORT_SYMBOL_GPL(ping_getfrag);
439 615
440static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, 616static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
441 struct flowi4 *fl4) 617 struct flowi4 *fl4)
442{ 618{
443 struct sk_buff *skb = skb_peek(&sk->sk_write_queue); 619 struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
444 620
@@ -450,24 +626,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
450 return ip_push_pending_frames(sk, fl4); 626 return ip_push_pending_frames(sk, fl4);
451} 627}
452 628
453static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 629int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
454 size_t len) 630 void *user_icmph, size_t icmph_len) {
455{ 631 u8 type, code;
456 struct net *net = sock_net(sk);
457 struct flowi4 fl4;
458 struct inet_sock *inet = inet_sk(sk);
459 struct ipcm_cookie ipc;
460 struct icmphdr user_icmph;
461 struct pingfakehdr pfh;
462 struct rtable *rt = NULL;
463 struct ip_options_data opt_copy;
464 int free = 0;
465 __be32 saddr, daddr, faddr;
466 u8 tos;
467 int err;
468
469 pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
470
471 632
472 if (len > 0xFFFF) 633 if (len > 0xFFFF)
473 return -EMSGSIZE; 634 return -EMSGSIZE;
@@ -482,15 +643,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
482 643
483 /* 644 /*
484 * Fetch the ICMP header provided by the userland. 645 * Fetch the ICMP header provided by the userland.
485 * iovec is modified! 646 * iovec is modified! The ICMP header is consumed.
486 */ 647 */
487 648 if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len))
488 if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov,
489 sizeof(struct icmphdr)))
490 return -EFAULT; 649 return -EFAULT;
491 if (!ping_supported(user_icmph.type, user_icmph.code)) 650
651 if (family == AF_INET) {
652 type = ((struct icmphdr *) user_icmph)->type;
653 code = ((struct icmphdr *) user_icmph)->code;
654#if IS_ENABLED(CONFIG_IPV6)
655 } else if (family == AF_INET6) {
656 type = ((struct icmp6hdr *) user_icmph)->icmp6_type;
657 code = ((struct icmp6hdr *) user_icmph)->icmp6_code;
658#endif
659 } else {
660 BUG();
661 }
662
663 if (!ping_supported(family, type, code))
492 return -EINVAL; 664 return -EINVAL;
493 665
666 return 0;
667}
668EXPORT_SYMBOL_GPL(ping_common_sendmsg);
669
670int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
671 size_t len)
672{
673 struct net *net = sock_net(sk);
674 struct flowi4 fl4;
675 struct inet_sock *inet = inet_sk(sk);
676 struct ipcm_cookie ipc;
677 struct icmphdr user_icmph;
678 struct pingfakehdr pfh;
679 struct rtable *rt = NULL;
680 struct ip_options_data opt_copy;
681 int free = 0;
682 __be32 saddr, daddr, faddr;
683 u8 tos;
684 int err;
685
686 pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
687
688 err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph,
689 sizeof(user_icmph));
690 if (err)
691 return err;
692
494 /* 693 /*
495 * Get and verify the address. 694 * Get and verify the address.
496 */ 695 */
@@ -595,13 +794,14 @@ back_from_confirm:
595 pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; 794 pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
596 pfh.iov = msg->msg_iov; 795 pfh.iov = msg->msg_iov;
597 pfh.wcheck = 0; 796 pfh.wcheck = 0;
797 pfh.family = AF_INET;
598 798
599 err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, 799 err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
600 0, &ipc, &rt, msg->msg_flags); 800 0, &ipc, &rt, msg->msg_flags);
601 if (err) 801 if (err)
602 ip_flush_pending_frames(sk); 802 ip_flush_pending_frames(sk);
603 else 803 else
604 err = ping_push_pending_frames(sk, &pfh, &fl4); 804 err = ping_v4_push_pending_frames(sk, &pfh, &fl4);
605 release_sock(sk); 805 release_sock(sk);
606 806
607out: 807out:
@@ -622,11 +822,13 @@ do_confirm:
622 goto out; 822 goto out;
623} 823}
624 824
625static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 825int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
626 size_t len, int noblock, int flags, int *addr_len) 826 size_t len, int noblock, int flags, int *addr_len)
627{ 827{
628 struct inet_sock *isk = inet_sk(sk); 828 struct inet_sock *isk = inet_sk(sk);
629 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 829 int family = sk->sk_family;
830 struct sockaddr_in *sin;
831 struct sockaddr_in6 *sin6;
630 struct sk_buff *skb; 832 struct sk_buff *skb;
631 int copied, err; 833 int copied, err;
632 834
@@ -636,11 +838,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
636 if (flags & MSG_OOB) 838 if (flags & MSG_OOB)
637 goto out; 839 goto out;
638 840
639 if (addr_len) 841 if (addr_len) {
640 *addr_len = sizeof(*sin); 842 if (family == AF_INET)
843 *addr_len = sizeof(*sin);
844 else if (family == AF_INET6 && addr_len)
845 *addr_len = sizeof(*sin6);
846 }
641 847
642 if (flags & MSG_ERRQUEUE) 848 if (flags & MSG_ERRQUEUE) {
643 return ip_recv_error(sk, msg, len); 849 if (family == AF_INET) {
850 return ip_recv_error(sk, msg, len);
851#if IS_ENABLED(CONFIG_IPV6)
852 } else if (family == AF_INET6) {
853 return pingv6_ops.ipv6_recv_error(sk, msg, len);
854#endif
855 }
856 }
644 857
645 skb = skb_recv_datagram(sk, flags, noblock, &err); 858 skb = skb_recv_datagram(sk, flags, noblock, &err);
646 if (!skb) 859 if (!skb)
@@ -659,15 +872,40 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
659 872
660 sock_recv_timestamp(msg, sk, skb); 873 sock_recv_timestamp(msg, sk, skb);
661 874
662 /* Copy the address. */ 875 /* Copy the address and add cmsg data. */
663 if (sin) { 876 if (family == AF_INET) {
877 sin = (struct sockaddr_in *) msg->msg_name;
664 sin->sin_family = AF_INET; 878 sin->sin_family = AF_INET;
665 sin->sin_port = 0 /* skb->h.uh->source */; 879 sin->sin_port = 0 /* skb->h.uh->source */;
666 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 880 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
667 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 881 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
882
883 if (isk->cmsg_flags)
884 ip_cmsg_recv(msg, skb);
885
886#if IS_ENABLED(CONFIG_IPV6)
887 } else if (family == AF_INET6) {
888 struct ipv6_pinfo *np = inet6_sk(sk);
889 struct ipv6hdr *ip6 = ipv6_hdr(skb);
890 sin6 = (struct sockaddr_in6 *) msg->msg_name;
891 sin6->sin6_family = AF_INET6;
892 sin6->sin6_port = 0;
893 sin6->sin6_addr = ip6->saddr;
894
895 sin6->sin6_flowinfo = 0;
896 if (np->sndflow)
897 sin6->sin6_flowinfo = ip6_flowinfo(ip6);
898
899 sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
900 IP6CB(skb)->iif);
901
902 if (inet6_sk(sk)->rxopt.all)
903 pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb);
904#endif
905 } else {
906 BUG();
668 } 907 }
669 if (isk->cmsg_flags) 908
670 ip_cmsg_recv(msg, skb);
671 err = copied; 909 err = copied;
672 910
673done: 911done:
@@ -676,8 +914,9 @@ out:
676 pr_debug("ping_recvmsg -> %d\n", err); 914 pr_debug("ping_recvmsg -> %d\n", err);
677 return err; 915 return err;
678} 916}
917EXPORT_SYMBOL_GPL(ping_recvmsg);
679 918
680static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 919int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
681{ 920{
682 pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", 921 pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
683 inet_sk(sk), inet_sk(sk)->inet_num, skb); 922 inet_sk(sk), inet_sk(sk)->inet_num, skb);
@@ -688,6 +927,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
688 } 927 }
689 return 0; 928 return 0;
690} 929}
930EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
691 931
692 932
693/* 933/*
@@ -698,10 +938,7 @@ void ping_rcv(struct sk_buff *skb)
698{ 938{
699 struct sock *sk; 939 struct sock *sk;
700 struct net *net = dev_net(skb->dev); 940 struct net *net = dev_net(skb->dev);
701 struct iphdr *iph = ip_hdr(skb);
702 struct icmphdr *icmph = icmp_hdr(skb); 941 struct icmphdr *icmph = icmp_hdr(skb);
703 __be32 saddr = iph->saddr;
704 __be32 daddr = iph->daddr;
705 942
706 /* We assume the packet has already been checked by icmp_rcv */ 943 /* We assume the packet has already been checked by icmp_rcv */
707 944
@@ -711,8 +948,7 @@ void ping_rcv(struct sk_buff *skb)
711 /* Push ICMP header back */ 948 /* Push ICMP header back */
712 skb_push(skb, skb->data - (u8 *)icmph); 949 skb_push(skb, skb->data - (u8 *)icmph);
713 950
714 sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id), 951 sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
715 skb->dev->ifindex);
716 if (sk != NULL) { 952 if (sk != NULL) {
717 pr_debug("rcv on socket %p\n", sk); 953 pr_debug("rcv on socket %p\n", sk);
718 ping_queue_rcv_skb(sk, skb_get(skb)); 954 ping_queue_rcv_skb(sk, skb_get(skb));
@@ -723,6 +959,7 @@ void ping_rcv(struct sk_buff *skb)
723 959
724 /* We're called from icmp_rcv(). kfree_skb() is done there. */ 960 /* We're called from icmp_rcv(). kfree_skb() is done there. */
725} 961}
962EXPORT_SYMBOL_GPL(ping_rcv);
726 963
727struct proto ping_prot = { 964struct proto ping_prot = {
728 .name = "PING", 965 .name = "PING",
@@ -733,14 +970,14 @@ struct proto ping_prot = {
733 .disconnect = udp_disconnect, 970 .disconnect = udp_disconnect,
734 .setsockopt = ip_setsockopt, 971 .setsockopt = ip_setsockopt,
735 .getsockopt = ip_getsockopt, 972 .getsockopt = ip_getsockopt,
736 .sendmsg = ping_sendmsg, 973 .sendmsg = ping_v4_sendmsg,
737 .recvmsg = ping_recvmsg, 974 .recvmsg = ping_recvmsg,
738 .bind = ping_bind, 975 .bind = ping_bind,
739 .backlog_rcv = ping_queue_rcv_skb, 976 .backlog_rcv = ping_queue_rcv_skb,
740 .release_cb = ip4_datagram_release_cb, 977 .release_cb = ip4_datagram_release_cb,
741 .hash = ping_v4_hash, 978 .hash = ping_hash,
742 .unhash = ping_v4_unhash, 979 .unhash = ping_unhash,
743 .get_port = ping_v4_get_port, 980 .get_port = ping_get_port,
744 .obj_size = sizeof(struct inet_sock), 981 .obj_size = sizeof(struct inet_sock),
745}; 982};
746EXPORT_SYMBOL(ping_prot); 983EXPORT_SYMBOL(ping_prot);
@@ -764,7 +1001,8 @@ static struct sock *ping_get_first(struct seq_file *seq, int start)
764 continue; 1001 continue;
765 1002
766 sk_nulls_for_each(sk, node, hslot) { 1003 sk_nulls_for_each(sk, node, hslot) {
767 if (net_eq(sock_net(sk), net)) 1004 if (net_eq(sock_net(sk), net) &&
1005 sk->sk_family == state->family)
768 goto found; 1006 goto found;
769 } 1007 }
770 } 1008 }
@@ -797,17 +1035,24 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
797 return pos ? NULL : sk; 1035 return pos ? NULL : sk;
798} 1036}
799 1037
800static void *ping_seq_start(struct seq_file *seq, loff_t *pos) 1038void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family)
801{ 1039{
802 struct ping_iter_state *state = seq->private; 1040 struct ping_iter_state *state = seq->private;
803 state->bucket = 0; 1041 state->bucket = 0;
1042 state->family = family;
804 1043
805 read_lock_bh(&ping_table.lock); 1044 read_lock_bh(&ping_table.lock);
806 1045
807 return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 1046 return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
808} 1047}
1048EXPORT_SYMBOL_GPL(ping_seq_start);
1049
1050static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos)
1051{
1052 return ping_seq_start(seq, pos, AF_INET);
1053}
809 1054
810static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1055void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
811{ 1056{
812 struct sock *sk; 1057 struct sock *sk;
813 1058
@@ -819,13 +1064,15 @@ static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
819 ++*pos; 1064 ++*pos;
820 return sk; 1065 return sk;
821} 1066}
1067EXPORT_SYMBOL_GPL(ping_seq_next);
822 1068
823static void ping_seq_stop(struct seq_file *seq, void *v) 1069void ping_seq_stop(struct seq_file *seq, void *v)
824{ 1070{
825 read_unlock_bh(&ping_table.lock); 1071 read_unlock_bh(&ping_table.lock);
826} 1072}
1073EXPORT_SYMBOL_GPL(ping_seq_stop);
827 1074
828static void ping_format_sock(struct sock *sp, struct seq_file *f, 1075static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
829 int bucket, int *len) 1076 int bucket, int *len)
830{ 1077{
831 struct inet_sock *inet = inet_sk(sp); 1078 struct inet_sock *inet = inet_sk(sp);
@@ -835,7 +1082,7 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f,
835 __u16 srcp = ntohs(inet->inet_sport); 1082 __u16 srcp = ntohs(inet->inet_sport);
836 1083
837 seq_printf(f, "%5d: %08X:%04X %08X:%04X" 1084 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
838 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n", 1085 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n",
839 bucket, src, srcp, dest, destp, sp->sk_state, 1086 bucket, src, srcp, dest, destp, sp->sk_state,
840 sk_wmem_alloc_get(sp), 1087 sk_wmem_alloc_get(sp),
841 sk_rmem_alloc_get(sp), 1088 sk_rmem_alloc_get(sp),
@@ -846,7 +1093,7 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f,
846 atomic_read(&sp->sk_drops), len); 1093 atomic_read(&sp->sk_drops), len);
847} 1094}
848 1095
849static int ping_seq_show(struct seq_file *seq, void *v) 1096static int ping_v4_seq_show(struct seq_file *seq, void *v)
850{ 1097{
851 if (v == SEQ_START_TOKEN) 1098 if (v == SEQ_START_TOKEN)
852 seq_printf(seq, "%-127s\n", 1099 seq_printf(seq, "%-127s\n",
@@ -857,72 +1104,86 @@ static int ping_seq_show(struct seq_file *seq, void *v)
857 struct ping_iter_state *state = seq->private; 1104 struct ping_iter_state *state = seq->private;
858 int len; 1105 int len;
859 1106
860 ping_format_sock(v, seq, state->bucket, &len); 1107 ping_v4_format_sock(v, seq, state->bucket, &len);
861 seq_printf(seq, "%*s\n", 127 - len, ""); 1108 seq_printf(seq, "%*s\n", 127 - len, "");
862 } 1109 }
863 return 0; 1110 return 0;
864} 1111}
865 1112
866static const struct seq_operations ping_seq_ops = { 1113static const struct seq_operations ping_v4_seq_ops = {
867 .show = ping_seq_show, 1114 .show = ping_v4_seq_show,
868 .start = ping_seq_start, 1115 .start = ping_v4_seq_start,
869 .next = ping_seq_next, 1116 .next = ping_seq_next,
870 .stop = ping_seq_stop, 1117 .stop = ping_seq_stop,
871}; 1118};
872 1119
873static int ping_seq_open(struct inode *inode, struct file *file) 1120static int ping_seq_open(struct inode *inode, struct file *file)
874{ 1121{
875 return seq_open_net(inode, file, &ping_seq_ops, 1122 struct ping_seq_afinfo *afinfo = PDE_DATA(inode);
1123 return seq_open_net(inode, file, &afinfo->seq_ops,
876 sizeof(struct ping_iter_state)); 1124 sizeof(struct ping_iter_state));
877} 1125}
878 1126
879static const struct file_operations ping_seq_fops = { 1127const struct file_operations ping_seq_fops = {
880 .open = ping_seq_open, 1128 .open = ping_seq_open,
881 .read = seq_read, 1129 .read = seq_read,
882 .llseek = seq_lseek, 1130 .llseek = seq_lseek,
883 .release = seq_release_net, 1131 .release = seq_release_net,
884}; 1132};
1133EXPORT_SYMBOL_GPL(ping_seq_fops);
1134
1135static struct ping_seq_afinfo ping_v4_seq_afinfo = {
1136 .name = "icmp",
1137 .family = AF_INET,
1138 .seq_fops = &ping_seq_fops,
1139 .seq_ops = {
1140 .start = ping_v4_seq_start,
1141 .show = ping_v4_seq_show,
1142 .next = ping_seq_next,
1143 .stop = ping_seq_stop,
1144 },
1145};
885 1146
886static int ping_proc_register(struct net *net) 1147int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo)
887{ 1148{
888 struct proc_dir_entry *p; 1149 struct proc_dir_entry *p;
889 int rc = 0; 1150 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
890 1151 afinfo->seq_fops, afinfo);
891 p = proc_create("icmp", S_IRUGO, net->proc_net, &ping_seq_fops);
892 if (!p) 1152 if (!p)
893 rc = -ENOMEM; 1153 return -ENOMEM;
894 return rc; 1154 return 0;
895} 1155}
1156EXPORT_SYMBOL_GPL(ping_proc_register);
896 1157
897static void ping_proc_unregister(struct net *net) 1158void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo)
898{ 1159{
899 remove_proc_entry("icmp", net->proc_net); 1160 remove_proc_entry(afinfo->name, net->proc_net);
900} 1161}
1162EXPORT_SYMBOL_GPL(ping_proc_unregister);
901 1163
902 1164static int __net_init ping_v4_proc_init_net(struct net *net)
903static int __net_init ping_proc_init_net(struct net *net)
904{ 1165{
905 return ping_proc_register(net); 1166 return ping_proc_register(net, &ping_v4_seq_afinfo);
906} 1167}
907 1168
908static void __net_exit ping_proc_exit_net(struct net *net) 1169static void __net_exit ping_v4_proc_exit_net(struct net *net)
909{ 1170{
910 ping_proc_unregister(net); 1171 ping_proc_unregister(net, &ping_v4_seq_afinfo);
911} 1172}
912 1173
913static struct pernet_operations ping_net_ops = { 1174static struct pernet_operations ping_v4_net_ops = {
914 .init = ping_proc_init_net, 1175 .init = ping_v4_proc_init_net,
915 .exit = ping_proc_exit_net, 1176 .exit = ping_v4_proc_exit_net,
916}; 1177};
917 1178
918int __init ping_proc_init(void) 1179int __init ping_proc_init(void)
919{ 1180{
920 return register_pernet_subsys(&ping_net_ops); 1181 return register_pernet_subsys(&ping_v4_net_ops);
921} 1182}
922 1183
923void ping_proc_exit(void) 1184void ping_proc_exit(void)
924{ 1185{
925 unregister_pernet_subsys(&ping_net_ops); 1186 unregister_pernet_subsys(&ping_v4_net_ops);
926} 1187}
927 1188
928#endif 1189#endif
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 2a5bf86d2415..4a0335854b89 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -111,7 +111,7 @@ static const struct snmp_mib snmp4_ipstats_list[] = {
111 SNMP_MIB_SENTINEL 111 SNMP_MIB_SENTINEL
112}; 112};
113 113
114/* Following RFC4293 items are displayed in /proc/net/netstat */ 114/* Following items are displayed in /proc/net/netstat */
115static const struct snmp_mib snmp4_ipextstats_list[] = { 115static const struct snmp_mib snmp4_ipextstats_list[] = {
116 SNMP_MIB_ITEM("InNoRoutes", IPSTATS_MIB_INNOROUTES), 116 SNMP_MIB_ITEM("InNoRoutes", IPSTATS_MIB_INNOROUTES),
117 SNMP_MIB_ITEM("InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS), 117 SNMP_MIB_ITEM("InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS),
@@ -125,7 +125,12 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
125 SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), 125 SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
126 SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), 126 SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
127 SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), 127 SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
128 /* Non RFC4293 fields */
128 SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS), 129 SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
130 SNMP_MIB_ITEM("InNoECTPkts", IPSTATS_MIB_NOECTPKTS),
131 SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
132 SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
133 SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
129 SNMP_MIB_SENTINEL 134 SNMP_MIB_SENTINEL
130}; 135};
131 136
@@ -273,6 +278,7 @@ static const struct snmp_mib snmp4_net_list[] = {
273 SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), 278 SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
274 SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), 279 SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
275 SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), 280 SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
281 SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS),
276 SNMP_MIB_SENTINEL 282 SNMP_MIB_SENTINEL
277}; 283};
278 284
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dd44e0ab600c..193db03540ad 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
218 218
219 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) 219 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
220 ipv4_sk_update_pmtu(skb, sk, info); 220 ipv4_sk_update_pmtu(skb, sk, info);
221 else if (type == ICMP_REDIRECT) 221 else if (type == ICMP_REDIRECT) {
222 ipv4_sk_redirect(skb, sk); 222 ipv4_sk_redirect(skb, sk);
223 return;
224 }
223 225
224 /* Report error on raw socket, if: 226 /* Report error on raw socket, if:
225 1. User requested ip_recverr. 227 1. User requested ip_recverr.
@@ -387,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
387 iph->check = 0; 389 iph->check = 0;
388 iph->tot_len = htons(length); 390 iph->tot_len = htons(length);
389 if (!iph->id) 391 if (!iph->id)
390 ip_select_ident(iph, &rt->dst, NULL); 392 ip_select_ident(skb, &rt->dst, NULL);
391 393
392 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 394 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
393 } 395 }
@@ -571,7 +573,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
571 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, 573 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
572 RT_SCOPE_UNIVERSE, 574 RT_SCOPE_UNIVERSE,
573 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 575 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
574 inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, 576 inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP |
577 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
575 daddr, saddr, 0, 0); 578 daddr, saddr, 0, 0);
576 579
577 if (!inet->hdrincl) { 580 if (!inet->hdrincl) {
@@ -987,7 +990,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
987 srcp = inet->inet_num; 990 srcp = inet->inet_num;
988 991
989 seq_printf(seq, "%4d: %08X:%04X %08X:%04X" 992 seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
990 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", 993 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
991 i, src, srcp, dest, destp, sp->sk_state, 994 i, src, srcp, dest, destp, sp->sk_state,
992 sk_wmem_alloc_get(sp), 995 sk_wmem_alloc_get(sp),
993 sk_rmem_alloc_get(sp), 996 sk_rmem_alloc_get(sp),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d35bbf0cf404..727f4365bcdf 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,8 @@
112#define RT_FL_TOS(oldflp4) \ 112#define RT_FL_TOS(oldflp4) \
113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) 113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
114 114
115#define IP_MAX_MTU 0xFFF0 115/* IPv4 datagram length is stored into 16bit field (tot_len) */
116#define IP_MAX_MTU 0xFFFF
116 117
117#define RT_GC_TIMEOUT (300*HZ) 118#define RT_GC_TIMEOUT (300*HZ)
118 119
@@ -435,12 +436,12 @@ static inline int ip_rt_proc_init(void)
435 436
436static inline bool rt_is_expired(const struct rtable *rth) 437static inline bool rt_is_expired(const struct rtable *rth)
437{ 438{
438 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); 439 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
439} 440}
440 441
441void rt_cache_flush(struct net *net) 442void rt_cache_flush(struct net *net)
442{ 443{
443 rt_genid_bump(net); 444 rt_genid_bump_ipv4(net);
444} 445}
445 446
446static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, 447static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -565,10 +566,25 @@ static inline void rt_free(struct rtable *rt)
565 566
566static DEFINE_SPINLOCK(fnhe_lock); 567static DEFINE_SPINLOCK(fnhe_lock);
567 568
569static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
570{
571 struct rtable *rt;
572
573 rt = rcu_dereference(fnhe->fnhe_rth_input);
574 if (rt) {
575 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
576 rt_free(rt);
577 }
578 rt = rcu_dereference(fnhe->fnhe_rth_output);
579 if (rt) {
580 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
581 rt_free(rt);
582 }
583}
584
568static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) 585static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
569{ 586{
570 struct fib_nh_exception *fnhe, *oldest; 587 struct fib_nh_exception *fnhe, *oldest;
571 struct rtable *orig;
572 588
573 oldest = rcu_dereference(hash->chain); 589 oldest = rcu_dereference(hash->chain);
574 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; 590 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -576,11 +592,7 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
576 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) 592 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
577 oldest = fnhe; 593 oldest = fnhe;
578 } 594 }
579 orig = rcu_dereference(oldest->fnhe_rth); 595 fnhe_flush_routes(oldest);
580 if (orig) {
581 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
582 rt_free(orig);
583 }
584 return oldest; 596 return oldest;
585} 597}
586 598
@@ -594,11 +606,25 @@ static inline u32 fnhe_hashfun(__be32 daddr)
594 return hval & (FNHE_HASH_SIZE - 1); 606 return hval & (FNHE_HASH_SIZE - 1);
595} 607}
596 608
609static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
610{
611 rt->rt_pmtu = fnhe->fnhe_pmtu;
612 rt->dst.expires = fnhe->fnhe_expires;
613
614 if (fnhe->fnhe_gw) {
615 rt->rt_flags |= RTCF_REDIRECTED;
616 rt->rt_gateway = fnhe->fnhe_gw;
617 rt->rt_uses_gateway = 1;
618 }
619}
620
597static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, 621static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
598 u32 pmtu, unsigned long expires) 622 u32 pmtu, unsigned long expires)
599{ 623{
600 struct fnhe_hash_bucket *hash; 624 struct fnhe_hash_bucket *hash;
601 struct fib_nh_exception *fnhe; 625 struct fib_nh_exception *fnhe;
626 struct rtable *rt;
627 unsigned int i;
602 int depth; 628 int depth;
603 u32 hval = fnhe_hashfun(daddr); 629 u32 hval = fnhe_hashfun(daddr);
604 630
@@ -627,8 +653,15 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
627 fnhe->fnhe_gw = gw; 653 fnhe->fnhe_gw = gw;
628 if (pmtu) { 654 if (pmtu) {
629 fnhe->fnhe_pmtu = pmtu; 655 fnhe->fnhe_pmtu = pmtu;
630 fnhe->fnhe_expires = expires; 656 fnhe->fnhe_expires = max(1UL, expires);
631 } 657 }
658 /* Update all cached dsts too */
659 rt = rcu_dereference(fnhe->fnhe_rth_input);
660 if (rt)
661 fill_route_from_fnhe(rt, fnhe);
662 rt = rcu_dereference(fnhe->fnhe_rth_output);
663 if (rt)
664 fill_route_from_fnhe(rt, fnhe);
632 } else { 665 } else {
633 if (depth > FNHE_RECLAIM_DEPTH) 666 if (depth > FNHE_RECLAIM_DEPTH)
634 fnhe = fnhe_oldest(hash); 667 fnhe = fnhe_oldest(hash);
@@ -640,10 +673,27 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
640 fnhe->fnhe_next = hash->chain; 673 fnhe->fnhe_next = hash->chain;
641 rcu_assign_pointer(hash->chain, fnhe); 674 rcu_assign_pointer(hash->chain, fnhe);
642 } 675 }
676 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
643 fnhe->fnhe_daddr = daddr; 677 fnhe->fnhe_daddr = daddr;
644 fnhe->fnhe_gw = gw; 678 fnhe->fnhe_gw = gw;
645 fnhe->fnhe_pmtu = pmtu; 679 fnhe->fnhe_pmtu = pmtu;
646 fnhe->fnhe_expires = expires; 680 fnhe->fnhe_expires = expires;
681
682 /* Exception created; mark the cached routes for the nexthop
683 * stale, so anyone caching it rechecks if this exception
684 * applies to them.
685 */
686 rt = rcu_dereference(nh->nh_rth_input);
687 if (rt)
688 rt->dst.obsolete = DST_OBSOLETE_KILL;
689
690 for_each_possible_cpu(i) {
691 struct rtable __rcu **prt;
692 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
693 rt = rcu_dereference(*prt);
694 if (rt)
695 rt->dst.obsolete = DST_OBSOLETE_KILL;
696 }
647 } 697 }
648 698
649 fnhe->fnhe_stamp = jiffies; 699 fnhe->fnhe_stamp = jiffies;
@@ -922,12 +972,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
922 if (mtu < ip_rt_min_pmtu) 972 if (mtu < ip_rt_min_pmtu)
923 mtu = ip_rt_min_pmtu; 973 mtu = ip_rt_min_pmtu;
924 974
925 if (!rt->rt_pmtu) { 975 if (rt->rt_pmtu == mtu &&
926 dst->obsolete = DST_OBSOLETE_KILL; 976 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
927 } else { 977 return;
928 rt->rt_pmtu = mtu;
929 dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
930 }
931 978
932 rcu_read_lock(); 979 rcu_read_lock();
933 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { 980 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
@@ -1068,11 +1115,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1068 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1115 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1069 * into this function always. 1116 * into this function always.
1070 * 1117 *
1071 * When a PMTU/redirect information update invalidates a 1118 * When a PMTU/redirect information update invalidates a route,
1072 * route, this is indicated by setting obsolete to 1119 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1073 * DST_OBSOLETE_KILL. 1120 * DST_OBSOLETE_DEAD by dst_free().
1074 */ 1121 */
1075 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) 1122 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
1076 return NULL; 1123 return NULL;
1077 return dst; 1124 return dst;
1078} 1125}
@@ -1181,10 +1228,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
1181 mtu = 576; 1228 mtu = 576;
1182 } 1229 }
1183 1230
1184 if (mtu > IP_MAX_MTU) 1231 return min_t(unsigned int, mtu, IP_MAX_MTU);
1185 mtu = IP_MAX_MTU;
1186
1187 return mtu;
1188} 1232}
1189 1233
1190static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) 1234static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
@@ -1214,34 +1258,36 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1214 spin_lock_bh(&fnhe_lock); 1258 spin_lock_bh(&fnhe_lock);
1215 1259
1216 if (daddr == fnhe->fnhe_daddr) { 1260 if (daddr == fnhe->fnhe_daddr) {
1217 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); 1261 struct rtable __rcu **porig;
1218 if (orig && rt_is_expired(orig)) { 1262 struct rtable *orig;
1263 int genid = fnhe_genid(dev_net(rt->dst.dev));
1264
1265 if (rt_is_input_route(rt))
1266 porig = &fnhe->fnhe_rth_input;
1267 else
1268 porig = &fnhe->fnhe_rth_output;
1269 orig = rcu_dereference(*porig);
1270
1271 if (fnhe->fnhe_genid != genid) {
1272 fnhe->fnhe_genid = genid;
1219 fnhe->fnhe_gw = 0; 1273 fnhe->fnhe_gw = 0;
1220 fnhe->fnhe_pmtu = 0; 1274 fnhe->fnhe_pmtu = 0;
1221 fnhe->fnhe_expires = 0; 1275 fnhe->fnhe_expires = 0;
1276 fnhe_flush_routes(fnhe);
1277 orig = NULL;
1222 } 1278 }
1223 if (fnhe->fnhe_pmtu) { 1279 fill_route_from_fnhe(rt, fnhe);
1224 unsigned long expires = fnhe->fnhe_expires; 1280 if (!rt->rt_gateway)
1225 unsigned long diff = expires - jiffies;
1226
1227 if (time_before(jiffies, expires)) {
1228 rt->rt_pmtu = fnhe->fnhe_pmtu;
1229 dst_set_expires(&rt->dst, diff);
1230 }
1231 }
1232 if (fnhe->fnhe_gw) {
1233 rt->rt_flags |= RTCF_REDIRECTED;
1234 rt->rt_gateway = fnhe->fnhe_gw;
1235 rt->rt_uses_gateway = 1;
1236 } else if (!rt->rt_gateway)
1237 rt->rt_gateway = daddr; 1281 rt->rt_gateway = daddr;
1238 1282
1239 rcu_assign_pointer(fnhe->fnhe_rth, rt); 1283 if (!(rt->dst.flags & DST_NOCACHE)) {
1240 if (orig) 1284 rcu_assign_pointer(*porig, rt);
1241 rt_free(orig); 1285 if (orig)
1286 rt_free(orig);
1287 ret = true;
1288 }
1242 1289
1243 fnhe->fnhe_stamp = jiffies; 1290 fnhe->fnhe_stamp = jiffies;
1244 ret = true;
1245 } 1291 }
1246 spin_unlock_bh(&fnhe_lock); 1292 spin_unlock_bh(&fnhe_lock);
1247 1293
@@ -1410,7 +1456,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1410#endif 1456#endif
1411 rth->dst.output = ip_rt_bug; 1457 rth->dst.output = ip_rt_bug;
1412 1458
1413 rth->rt_genid = rt_genid(dev_net(dev)); 1459 rth->rt_genid = rt_genid_ipv4(dev_net(dev));
1414 rth->rt_flags = RTCF_MULTICAST; 1460 rth->rt_flags = RTCF_MULTICAST;
1415 rth->rt_type = RTN_MULTICAST; 1461 rth->rt_type = RTN_MULTICAST;
1416 rth->rt_is_input= 1; 1462 rth->rt_is_input= 1;
@@ -1473,6 +1519,7 @@ static int __mkroute_input(struct sk_buff *skb,
1473 struct in_device *in_dev, 1519 struct in_device *in_dev,
1474 __be32 daddr, __be32 saddr, u32 tos) 1520 __be32 daddr, __be32 saddr, u32 tos)
1475{ 1521{
1522 struct fib_nh_exception *fnhe;
1476 struct rtable *rth; 1523 struct rtable *rth;
1477 int err; 1524 int err;
1478 struct in_device *out_dev; 1525 struct in_device *out_dev;
@@ -1519,8 +1566,13 @@ static int __mkroute_input(struct sk_buff *skb,
1519 } 1566 }
1520 } 1567 }
1521 1568
1569 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1522 if (do_cache) { 1570 if (do_cache) {
1523 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); 1571 if (fnhe != NULL)
1572 rth = rcu_dereference(fnhe->fnhe_rth_input);
1573 else
1574 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1575
1524 if (rt_cache_valid(rth)) { 1576 if (rt_cache_valid(rth)) {
1525 skb_dst_set_noref(skb, &rth->dst); 1577 skb_dst_set_noref(skb, &rth->dst);
1526 goto out; 1578 goto out;
@@ -1535,7 +1587,7 @@ static int __mkroute_input(struct sk_buff *skb,
1535 goto cleanup; 1587 goto cleanup;
1536 } 1588 }
1537 1589
1538 rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); 1590 rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
1539 rth->rt_flags = flags; 1591 rth->rt_flags = flags;
1540 rth->rt_type = res->type; 1592 rth->rt_type = res->type;
1541 rth->rt_is_input = 1; 1593 rth->rt_is_input = 1;
@@ -1548,7 +1600,7 @@ static int __mkroute_input(struct sk_buff *skb,
1548 rth->dst.input = ip_forward; 1600 rth->dst.input = ip_forward;
1549 rth->dst.output = ip_output; 1601 rth->dst.output = ip_output;
1550 1602
1551 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); 1603 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
1552 skb_dst_set(skb, &rth->dst); 1604 skb_dst_set(skb, &rth->dst);
1553out: 1605out:
1554 err = 0; 1606 err = 0;
@@ -1706,7 +1758,7 @@ local_input:
1706 rth->dst.tclassid = itag; 1758 rth->dst.tclassid = itag;
1707#endif 1759#endif
1708 1760
1709 rth->rt_genid = rt_genid(net); 1761 rth->rt_genid = rt_genid_ipv4(net);
1710 rth->rt_flags = flags|RTCF_LOCAL; 1762 rth->rt_flags = flags|RTCF_LOCAL;
1711 rth->rt_type = res.type; 1763 rth->rt_type = res.type;
1712 rth->rt_is_input = 1; 1764 rth->rt_is_input = 1;
@@ -1863,7 +1915,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1863 1915
1864 fnhe = find_exception(nh, fl4->daddr); 1916 fnhe = find_exception(nh, fl4->daddr);
1865 if (fnhe) 1917 if (fnhe)
1866 prth = &fnhe->fnhe_rth; 1918 prth = &fnhe->fnhe_rth_output;
1867 else { 1919 else {
1868 if (unlikely(fl4->flowi4_flags & 1920 if (unlikely(fl4->flowi4_flags &
1869 FLOWI_FLAG_KNOWN_NH && 1921 FLOWI_FLAG_KNOWN_NH &&
@@ -1891,7 +1943,7 @@ add:
1891 1943
1892 rth->dst.output = ip_output; 1944 rth->dst.output = ip_output;
1893 1945
1894 rth->rt_genid = rt_genid(dev_net(dev_out)); 1946 rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
1895 rth->rt_flags = flags; 1947 rth->rt_flags = flags;
1896 rth->rt_type = type; 1948 rth->rt_type = type;
1897 rth->rt_is_input = 0; 1949 rth->rt_is_input = 0;
@@ -2173,7 +2225,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2173 rt->rt_iif = ort->rt_iif; 2225 rt->rt_iif = ort->rt_iif;
2174 rt->rt_pmtu = ort->rt_pmtu; 2226 rt->rt_pmtu = ort->rt_pmtu;
2175 2227
2176 rt->rt_genid = rt_genid(net); 2228 rt->rt_genid = rt_genid_ipv4(net);
2177 rt->rt_flags = ort->rt_flags; 2229 rt->rt_flags = ort->rt_flags;
2178 rt->rt_type = ort->rt_type; 2230 rt->rt_type = ort->rt_type;
2179 rt->rt_gateway = ort->rt_gateway; 2231 rt->rt_gateway = ort->rt_gateway;
@@ -2429,19 +2481,22 @@ static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2429static int ip_rt_gc_min_interval __read_mostly = HZ / 2; 2481static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2430static int ip_rt_gc_elasticity __read_mostly = 8; 2482static int ip_rt_gc_elasticity __read_mostly = 8;
2431 2483
2432static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, 2484static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
2433 void __user *buffer, 2485 void __user *buffer,
2434 size_t *lenp, loff_t *ppos) 2486 size_t *lenp, loff_t *ppos)
2435{ 2487{
2488 struct net *net = (struct net *)__ctl->extra1;
2489
2436 if (write) { 2490 if (write) {
2437 rt_cache_flush((struct net *)__ctl->extra1); 2491 rt_cache_flush(net);
2492 fnhe_genid_bump(net);
2438 return 0; 2493 return 0;
2439 } 2494 }
2440 2495
2441 return -EINVAL; 2496 return -EINVAL;
2442} 2497}
2443 2498
2444static ctl_table ipv4_route_table[] = { 2499static struct ctl_table ipv4_route_table[] = {
2445 { 2500 {
2446 .procname = "gc_thresh", 2501 .procname = "gc_thresh",
2447 .data = &ipv4_dst_ops.gc_thresh, 2502 .data = &ipv4_dst_ops.gc_thresh,
@@ -2608,7 +2663,8 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
2608 2663
2609static __net_init int rt_genid_init(struct net *net) 2664static __net_init int rt_genid_init(struct net *net)
2610{ 2665{
2611 atomic_set(&net->rt_genid, 0); 2666 atomic_set(&net->ipv4.rt_genid, 0);
2667 atomic_set(&net->fnhe_genid, 0);
2612 get_random_bytes(&net->ipv4.dev_addr_genid, 2668 get_random_bytes(&net->ipv4.dev_addr_genid,
2613 sizeof(net->ipv4.dev_addr_genid)); 2669 sizeof(net->ipv4.dev_addr_genid));
2614 return 0; 2670 return 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b05c96e7af8b..14a15c49129d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -160,26 +160,33 @@ static __u16 const msstab[] = {
160 * Generate a syncookie. mssp points to the mss, which is returned 160 * Generate a syncookie. mssp points to the mss, which is returned
161 * rounded down to the value encoded in the cookie. 161 * rounded down to the value encoded in the cookie.
162 */ 162 */
163__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) 163u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
164 u16 *mssp)
164{ 165{
165 const struct iphdr *iph = ip_hdr(skb);
166 const struct tcphdr *th = tcp_hdr(skb);
167 int mssind; 166 int mssind;
168 const __u16 mss = *mssp; 167 const __u16 mss = *mssp;
169 168
170 tcp_synq_overflow(sk);
171
172 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) 169 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
173 if (mss >= msstab[mssind]) 170 if (mss >= msstab[mssind])
174 break; 171 break;
175 *mssp = msstab[mssind]; 172 *mssp = msstab[mssind];
176 173
177 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
178
179 return secure_tcp_syn_cookie(iph->saddr, iph->daddr, 174 return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
180 th->source, th->dest, ntohl(th->seq), 175 th->source, th->dest, ntohl(th->seq),
181 jiffies / (HZ * 60), mssind); 176 jiffies / (HZ * 60), mssind);
182} 177}
178EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
179
180__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
181{
182 const struct iphdr *iph = ip_hdr(skb);
183 const struct tcphdr *th = tcp_hdr(skb);
184
185 tcp_synq_overflow(sk);
186 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
187
188 return __cookie_v4_init_sequence(iph, th, mssp);
189}
183 190
184/* 191/*
185 * This (misnamed) value is the age of syncookie which is permitted. 192 * This (misnamed) value is the age of syncookie which is permitted.
@@ -192,10 +199,9 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
192 * Check if a ack sequence number is a valid syncookie. 199 * Check if a ack sequence number is a valid syncookie.
193 * Return the decoded mss if it is, or 0 if not. 200 * Return the decoded mss if it is, or 0 if not.
194 */ 201 */
195static inline int cookie_check(struct sk_buff *skb, __u32 cookie) 202int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
203 u32 cookie)
196{ 204{
197 const struct iphdr *iph = ip_hdr(skb);
198 const struct tcphdr *th = tcp_hdr(skb);
199 __u32 seq = ntohl(th->seq) - 1; 205 __u32 seq = ntohl(th->seq) - 1;
200 __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, 206 __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
201 th->source, th->dest, seq, 207 th->source, th->dest, seq,
@@ -204,6 +210,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
204 210
205 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; 211 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
206} 212}
213EXPORT_SYMBOL_GPL(__cookie_v4_check);
207 214
208static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, 215static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
209 struct request_sock *req, 216 struct request_sock *req,
@@ -284,7 +291,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
284 goto out; 291 goto out;
285 292
286 if (tcp_synq_no_recent_overflow(sk) || 293 if (tcp_synq_no_recent_overflow(sk) ||
287 (mss = cookie_check(skb, cookie)) == 0) { 294 (mss = __cookie_v4_check(ip_hdr(skb), th, cookie)) == 0) {
288 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); 295 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
289 goto out; 296 goto out;
290 } 297 }
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index fa2f63fc453b..540279f4c531 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -29,6 +29,7 @@
29static int zero; 29static int zero;
30static int one = 1; 30static int one = 1;
31static int four = 4; 31static int four = 4;
32static int gso_max_segs = GSO_MAX_SEGS;
32static int tcp_retr1_max = 255; 33static int tcp_retr1_max = 255;
33static int ip_local_port_range_min[] = { 1, 1 }; 34static int ip_local_port_range_min[] = { 1, 1 };
34static int ip_local_port_range_max[] = { 65535, 65535 }; 35static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -36,6 +37,8 @@ static int tcp_adv_win_scale_min = -31;
36static int tcp_adv_win_scale_max = 31; 37static int tcp_adv_win_scale_max = 31;
37static int ip_ttl_min = 1; 38static int ip_ttl_min = 1;
38static int ip_ttl_max = 255; 39static int ip_ttl_max = 255;
40static int tcp_syn_retries_min = 1;
41static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
39static int ip_ping_group_range_min[] = { 0, 0 }; 42static int ip_ping_group_range_min[] = { 0, 0 };
40static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; 43static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
41 44
@@ -49,13 +52,13 @@ static void set_local_port_range(int range[2])
49} 52}
50 53
51/* Validate changes from /proc interface. */ 54/* Validate changes from /proc interface. */
52static int ipv4_local_port_range(ctl_table *table, int write, 55static int ipv4_local_port_range(struct ctl_table *table, int write,
53 void __user *buffer, 56 void __user *buffer,
54 size_t *lenp, loff_t *ppos) 57 size_t *lenp, loff_t *ppos)
55{ 58{
56 int ret; 59 int ret;
57 int range[2]; 60 int range[2];
58 ctl_table tmp = { 61 struct ctl_table tmp = {
59 .data = &range, 62 .data = &range,
60 .maxlen = sizeof(range), 63 .maxlen = sizeof(range),
61 .mode = table->mode, 64 .mode = table->mode,
@@ -100,7 +103,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
100} 103}
101 104
102/* Validate changes from /proc interface. */ 105/* Validate changes from /proc interface. */
103static int ipv4_ping_group_range(ctl_table *table, int write, 106static int ipv4_ping_group_range(struct ctl_table *table, int write,
104 void __user *buffer, 107 void __user *buffer,
105 size_t *lenp, loff_t *ppos) 108 size_t *lenp, loff_t *ppos)
106{ 109{
@@ -108,7 +111,7 @@ static int ipv4_ping_group_range(ctl_table *table, int write,
108 int ret; 111 int ret;
109 gid_t urange[2]; 112 gid_t urange[2];
110 kgid_t low, high; 113 kgid_t low, high;
111 ctl_table tmp = { 114 struct ctl_table tmp = {
112 .data = &urange, 115 .data = &urange,
113 .maxlen = sizeof(urange), 116 .maxlen = sizeof(urange),
114 .mode = table->mode, 117 .mode = table->mode,
@@ -135,11 +138,11 @@ static int ipv4_ping_group_range(ctl_table *table, int write,
135 return ret; 138 return ret;
136} 139}
137 140
138static int proc_tcp_congestion_control(ctl_table *ctl, int write, 141static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
139 void __user *buffer, size_t *lenp, loff_t *ppos) 142 void __user *buffer, size_t *lenp, loff_t *ppos)
140{ 143{
141 char val[TCP_CA_NAME_MAX]; 144 char val[TCP_CA_NAME_MAX];
142 ctl_table tbl = { 145 struct ctl_table tbl = {
143 .data = val, 146 .data = val,
144 .maxlen = TCP_CA_NAME_MAX, 147 .maxlen = TCP_CA_NAME_MAX,
145 }; 148 };
@@ -153,12 +156,12 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write,
153 return ret; 156 return ret;
154} 157}
155 158
156static int proc_tcp_available_congestion_control(ctl_table *ctl, 159static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
157 int write, 160 int write,
158 void __user *buffer, size_t *lenp, 161 void __user *buffer, size_t *lenp,
159 loff_t *ppos) 162 loff_t *ppos)
160{ 163{
161 ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; 164 struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
162 int ret; 165 int ret;
163 166
164 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 167 tbl.data = kmalloc(tbl.maxlen, GFP_USER);
@@ -170,12 +173,12 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl,
170 return ret; 173 return ret;
171} 174}
172 175
173static int proc_allowed_congestion_control(ctl_table *ctl, 176static int proc_allowed_congestion_control(struct ctl_table *ctl,
174 int write, 177 int write,
175 void __user *buffer, size_t *lenp, 178 void __user *buffer, size_t *lenp,
176 loff_t *ppos) 179 loff_t *ppos)
177{ 180{
178 ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; 181 struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
179 int ret; 182 int ret;
180 183
181 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 184 tbl.data = kmalloc(tbl.maxlen, GFP_USER);
@@ -190,7 +193,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
190 return ret; 193 return ret;
191} 194}
192 195
193static int ipv4_tcp_mem(ctl_table *ctl, int write, 196static int ipv4_tcp_mem(struct ctl_table *ctl, int write,
194 void __user *buffer, size_t *lenp, 197 void __user *buffer, size_t *lenp,
195 loff_t *ppos) 198 loff_t *ppos)
196{ 199{
@@ -201,7 +204,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
201 struct mem_cgroup *memcg; 204 struct mem_cgroup *memcg;
202#endif 205#endif
203 206
204 ctl_table tmp = { 207 struct ctl_table tmp = {
205 .data = &vec, 208 .data = &vec,
206 .maxlen = sizeof(vec), 209 .maxlen = sizeof(vec),
207 .mode = ctl->mode, 210 .mode = ctl->mode,
@@ -233,10 +236,11 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
233 return 0; 236 return 0;
234} 237}
235 238
236static int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, 239static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
237 size_t *lenp, loff_t *ppos) 240 void __user *buffer, size_t *lenp,
241 loff_t *ppos)
238{ 242{
239 ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; 243 struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
240 struct tcp_fastopen_context *ctxt; 244 struct tcp_fastopen_context *ctxt;
241 int ret; 245 int ret;
242 u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ 246 u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
@@ -331,7 +335,9 @@ static struct ctl_table ipv4_table[] = {
331 .data = &sysctl_tcp_syn_retries, 335 .data = &sysctl_tcp_syn_retries,
332 .maxlen = sizeof(int), 336 .maxlen = sizeof(int),
333 .mode = 0644, 337 .mode = 0644,
334 .proc_handler = proc_dointvec 338 .proc_handler = proc_dointvec_minmax,
339 .extra1 = &tcp_syn_retries_min,
340 .extra2 = &tcp_syn_retries_max
335 }, 341 },
336 { 342 {
337 .procname = "tcp_synack_retries", 343 .procname = "tcp_synack_retries",
@@ -554,6 +560,13 @@ static struct ctl_table ipv4_table[] = {
554 .extra1 = &one, 560 .extra1 = &one,
555 }, 561 },
556 { 562 {
563 .procname = "tcp_notsent_lowat",
564 .data = &sysctl_tcp_notsent_lowat,
565 .maxlen = sizeof(sysctl_tcp_notsent_lowat),
566 .mode = 0644,
567 .proc_handler = proc_dointvec,
568 },
569 {
557 .procname = "tcp_rmem", 570 .procname = "tcp_rmem",
558 .data = &sysctl_tcp_rmem, 571 .data = &sysctl_tcp_rmem,
559 .maxlen = sizeof(sysctl_tcp_rmem), 572 .maxlen = sizeof(sysctl_tcp_rmem),
@@ -749,6 +762,15 @@ static struct ctl_table ipv4_table[] = {
749 .extra2 = &four, 762 .extra2 = &four,
750 }, 763 },
751 { 764 {
765 .procname = "tcp_min_tso_segs",
766 .data = &sysctl_tcp_min_tso_segs,
767 .maxlen = sizeof(int),
768 .mode = 0644,
769 .proc_handler = proc_dointvec_minmax,
770 .extra1 = &zero,
771 .extra2 = &gso_max_segs,
772 },
773 {
752 .procname = "udp_mem", 774 .procname = "udp_mem",
753 .data = &sysctl_udp_mem, 775 .data = &sysctl_udp_mem,
754 .maxlen = sizeof(sysctl_udp_mem), 776 .maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab450c099aa4..6e5617b9f9db 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -279,9 +279,12 @@
279 279
280#include <asm/uaccess.h> 280#include <asm/uaccess.h>
281#include <asm/ioctls.h> 281#include <asm/ioctls.h>
282#include <net/busy_poll.h>
282 283
283int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; 284int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
284 285
286int sysctl_tcp_min_tso_segs __read_mostly = 2;
287
285struct percpu_counter tcp_orphan_count; 288struct percpu_counter tcp_orphan_count;
286EXPORT_SYMBOL_GPL(tcp_orphan_count); 289EXPORT_SYMBOL_GPL(tcp_orphan_count);
287 290
@@ -409,10 +412,6 @@ void tcp_init_sock(struct sock *sk)
409 412
410 icsk->icsk_sync_mss = tcp_sync_mss; 413 icsk->icsk_sync_mss = tcp_sync_mss;
411 414
412 /* Presumed zeroed, in order of appearance:
413 * cookie_in_always, cookie_out_never,
414 * s_data_constant, s_data_in, s_data_out
415 */
416 sk->sk_sndbuf = sysctl_tcp_wmem[1]; 415 sk->sk_sndbuf = sysctl_tcp_wmem[1];
417 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 416 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
418 417
@@ -436,6 +435,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
436 struct sock *sk = sock->sk; 435 struct sock *sk = sock->sk;
437 const struct tcp_sock *tp = tcp_sk(sk); 436 const struct tcp_sock *tp = tcp_sk(sk);
438 437
438 sock_rps_record_flow(sk);
439
439 sock_poll_wait(file, sk_sleep(sk), wait); 440 sock_poll_wait(file, sk_sleep(sk), wait);
440 if (sk->sk_state == TCP_LISTEN) 441 if (sk->sk_state == TCP_LISTEN)
441 return inet_csk_listen_poll(sk); 442 return inet_csk_listen_poll(sk);
@@ -496,7 +497,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
496 mask |= POLLIN | POLLRDNORM; 497 mask |= POLLIN | POLLRDNORM;
497 498
498 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 499 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
499 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 500 if (sk_stream_is_writeable(sk)) {
500 mask |= POLLOUT | POLLWRNORM; 501 mask |= POLLOUT | POLLWRNORM;
501 } else { /* send SIGIO later */ 502 } else { /* send SIGIO later */
502 set_bit(SOCK_ASYNC_NOSPACE, 503 set_bit(SOCK_ASYNC_NOSPACE,
@@ -507,7 +508,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
507 * wspace test but before the flags are set, 508 * wspace test but before the flags are set,
508 * IO signal will be lost. 509 * IO signal will be lost.
509 */ 510 */
510 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 511 if (sk_stream_is_writeable(sk))
511 mask |= POLLOUT | POLLWRNORM; 512 mask |= POLLOUT | POLLWRNORM;
512 } 513 }
513 } else 514 } else
@@ -786,12 +787,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
786 xmit_size_goal = mss_now; 787 xmit_size_goal = mss_now;
787 788
788 if (large_allowed && sk_can_gso(sk)) { 789 if (large_allowed && sk_can_gso(sk)) {
789 xmit_size_goal = ((sk->sk_gso_max_size - 1) - 790 u32 gso_size, hlen;
790 inet_csk(sk)->icsk_af_ops->net_header_len - 791
791 inet_csk(sk)->icsk_ext_hdr_len - 792 /* Maybe we should/could use sk->sk_prot->max_header here ? */
792 tp->tcp_header_len); 793 hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
794 inet_csk(sk)->icsk_ext_hdr_len +
795 tp->tcp_header_len;
796
797 /* Goal is to send at least one packet per ms,
798 * not one big TSO packet every 100 ms.
799 * This preserves ACK clocking and is consistent
800 * with tcp_tso_should_defer() heuristic.
801 */
802 gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
803 gso_size = max_t(u32, gso_size,
804 sysctl_tcp_min_tso_segs * mss_now);
805
806 xmit_size_goal = min_t(u32, gso_size,
807 sk->sk_gso_max_size - 1 - hlen);
793 808
794 /* TSQ : try to have two TSO segments in flight */ 809 /* TSQ : try to have at least two segments in flight
810 * (one in NIC TX ring, another in Qdisc)
811 */
795 xmit_size_goal = min_t(u32, xmit_size_goal, 812 xmit_size_goal = min_t(u32, xmit_size_goal,
796 sysctl_tcp_limit_output_bytes >> 1); 813 sysctl_tcp_limit_output_bytes >> 1);
797 814
@@ -1118,6 +1135,13 @@ new_segment:
1118 goto wait_for_memory; 1135 goto wait_for_memory;
1119 1136
1120 /* 1137 /*
1138 * All packets are restored as if they have
1139 * already been sent.
1140 */
1141 if (tp->repair)
1142 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1143
1144 /*
1121 * Check whether we can use HW checksum. 1145 * Check whether we can use HW checksum.
1122 */ 1146 */
1123 if (sk->sk_route_caps & NETIF_F_ALL_CSUM) 1147 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
@@ -1551,6 +1575,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1551 struct sk_buff *skb; 1575 struct sk_buff *skb;
1552 u32 urg_hole = 0; 1576 u32 urg_hole = 0;
1553 1577
1578 if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
1579 (sk->sk_state == TCP_ESTABLISHED))
1580 sk_busy_loop(sk, nonblock);
1581
1554 lock_sock(sk); 1582 lock_sock(sk);
1555 1583
1556 err = -ENOTCONN; 1584 err = -ENOTCONN;
@@ -2440,10 +2468,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2440 case TCP_THIN_DUPACK: 2468 case TCP_THIN_DUPACK:
2441 if (val < 0 || val > 1) 2469 if (val < 0 || val > 1)
2442 err = -EINVAL; 2470 err = -EINVAL;
2443 else 2471 else {
2444 tp->thin_dupack = val; 2472 tp->thin_dupack = val;
2445 if (tp->thin_dupack) 2473 if (tp->thin_dupack)
2446 tcp_disable_early_retrans(tp); 2474 tcp_disable_early_retrans(tp);
2475 }
2447 break; 2476 break;
2448 2477
2449 case TCP_REPAIR: 2478 case TCP_REPAIR:
@@ -2624,6 +2653,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2624 else 2653 else
2625 tp->tsoffset = val - tcp_time_stamp; 2654 tp->tsoffset = val - tcp_time_stamp;
2626 break; 2655 break;
2656 case TCP_NOTSENT_LOWAT:
2657 tp->notsent_lowat = val;
2658 sk->sk_write_space(sk);
2659 break;
2627 default: 2660 default:
2628 err = -ENOPROTOOPT; 2661 err = -ENOPROTOOPT;
2629 break; 2662 break;
@@ -2840,6 +2873,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2840 case TCP_TIMESTAMP: 2873 case TCP_TIMESTAMP:
2841 val = tcp_time_stamp + tp->tsoffset; 2874 val = tcp_time_stamp + tp->tsoffset;
2842 break; 2875 break;
2876 case TCP_NOTSENT_LOWAT:
2877 val = tp->notsent_lowat;
2878 break;
2843 default: 2879 default:
2844 return -ENOPROTOOPT; 2880 return -ENOPROTOOPT;
2845 } 2881 }
@@ -2875,249 +2911,9 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2875EXPORT_SYMBOL(compat_tcp_getsockopt); 2911EXPORT_SYMBOL(compat_tcp_getsockopt);
2876#endif 2912#endif
2877 2913
2878struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
2879 netdev_features_t features)
2880{
2881 struct sk_buff *segs = ERR_PTR(-EINVAL);
2882 struct tcphdr *th;
2883 unsigned int thlen;
2884 unsigned int seq;
2885 __be32 delta;
2886 unsigned int oldlen;
2887 unsigned int mss;
2888 struct sk_buff *gso_skb = skb;
2889 __sum16 newcheck;
2890 bool ooo_okay, copy_destructor;
2891
2892 if (!pskb_may_pull(skb, sizeof(*th)))
2893 goto out;
2894
2895 th = tcp_hdr(skb);
2896 thlen = th->doff * 4;
2897 if (thlen < sizeof(*th))
2898 goto out;
2899
2900 if (!pskb_may_pull(skb, thlen))
2901 goto out;
2902
2903 oldlen = (u16)~skb->len;
2904 __skb_pull(skb, thlen);
2905
2906 mss = skb_shinfo(skb)->gso_size;
2907 if (unlikely(skb->len <= mss))
2908 goto out;
2909
2910 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
2911 /* Packet is from an untrusted source, reset gso_segs. */
2912 int type = skb_shinfo(skb)->gso_type;
2913
2914 if (unlikely(type &
2915 ~(SKB_GSO_TCPV4 |
2916 SKB_GSO_DODGY |
2917 SKB_GSO_TCP_ECN |
2918 SKB_GSO_TCPV6 |
2919 SKB_GSO_GRE |
2920 SKB_GSO_UDP_TUNNEL |
2921 0) ||
2922 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
2923 goto out;
2924
2925 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
2926
2927 segs = NULL;
2928 goto out;
2929 }
2930
2931 copy_destructor = gso_skb->destructor == tcp_wfree;
2932 ooo_okay = gso_skb->ooo_okay;
2933 /* All segments but the first should have ooo_okay cleared */
2934 skb->ooo_okay = 0;
2935
2936 segs = skb_segment(skb, features);
2937 if (IS_ERR(segs))
2938 goto out;
2939
2940 /* Only first segment might have ooo_okay set */
2941 segs->ooo_okay = ooo_okay;
2942
2943 delta = htonl(oldlen + (thlen + mss));
2944
2945 skb = segs;
2946 th = tcp_hdr(skb);
2947 seq = ntohl(th->seq);
2948
2949 newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
2950 (__force u32)delta));
2951
2952 do {
2953 th->fin = th->psh = 0;
2954 th->check = newcheck;
2955
2956 if (skb->ip_summed != CHECKSUM_PARTIAL)
2957 th->check =
2958 csum_fold(csum_partial(skb_transport_header(skb),
2959 thlen, skb->csum));
2960
2961 seq += mss;
2962 if (copy_destructor) {
2963 skb->destructor = gso_skb->destructor;
2964 skb->sk = gso_skb->sk;
2965 /* {tcp|sock}_wfree() use exact truesize accounting :
2966 * sum(skb->truesize) MUST be exactly be gso_skb->truesize
2967 * So we account mss bytes of 'true size' for each segment.
2968 * The last segment will contain the remaining.
2969 */
2970 skb->truesize = mss;
2971 gso_skb->truesize -= mss;
2972 }
2973 skb = skb->next;
2974 th = tcp_hdr(skb);
2975
2976 th->seq = htonl(seq);
2977 th->cwr = 0;
2978 } while (skb->next);
2979
2980 /* Following permits TCP Small Queues to work well with GSO :
2981 * The callback to TCP stack will be called at the time last frag
2982 * is freed at TX completion, and not right now when gso_skb
2983 * is freed by GSO engine
2984 */
2985 if (copy_destructor) {
2986 swap(gso_skb->sk, skb->sk);
2987 swap(gso_skb->destructor, skb->destructor);
2988 swap(gso_skb->truesize, skb->truesize);
2989 }
2990
2991 delta = htonl(oldlen + (skb->tail - skb->transport_header) +
2992 skb->data_len);
2993 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2994 (__force u32)delta));
2995 if (skb->ip_summed != CHECKSUM_PARTIAL)
2996 th->check = csum_fold(csum_partial(skb_transport_header(skb),
2997 thlen, skb->csum));
2998
2999out:
3000 return segs;
3001}
3002EXPORT_SYMBOL(tcp_tso_segment);
3003
3004struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
3005{
3006 struct sk_buff **pp = NULL;
3007 struct sk_buff *p;
3008 struct tcphdr *th;
3009 struct tcphdr *th2;
3010 unsigned int len;
3011 unsigned int thlen;
3012 __be32 flags;
3013 unsigned int mss = 1;
3014 unsigned int hlen;
3015 unsigned int off;
3016 int flush = 1;
3017 int i;
3018
3019 off = skb_gro_offset(skb);
3020 hlen = off + sizeof(*th);
3021 th = skb_gro_header_fast(skb, off);
3022 if (skb_gro_header_hard(skb, hlen)) {
3023 th = skb_gro_header_slow(skb, hlen, off);
3024 if (unlikely(!th))
3025 goto out;
3026 }
3027
3028 thlen = th->doff * 4;
3029 if (thlen < sizeof(*th))
3030 goto out;
3031
3032 hlen = off + thlen;
3033 if (skb_gro_header_hard(skb, hlen)) {
3034 th = skb_gro_header_slow(skb, hlen, off);
3035 if (unlikely(!th))
3036 goto out;
3037 }
3038
3039 skb_gro_pull(skb, thlen);
3040
3041 len = skb_gro_len(skb);
3042 flags = tcp_flag_word(th);
3043
3044 for (; (p = *head); head = &p->next) {
3045 if (!NAPI_GRO_CB(p)->same_flow)
3046 continue;
3047
3048 th2 = tcp_hdr(p);
3049
3050 if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
3051 NAPI_GRO_CB(p)->same_flow = 0;
3052 continue;
3053 }
3054
3055 goto found;
3056 }
3057
3058 goto out_check_final;
3059
3060found:
3061 flush = NAPI_GRO_CB(p)->flush;
3062 flush |= (__force int)(flags & TCP_FLAG_CWR);
3063 flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
3064 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
3065 flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
3066 for (i = sizeof(*th); i < thlen; i += 4)
3067 flush |= *(u32 *)((u8 *)th + i) ^
3068 *(u32 *)((u8 *)th2 + i);
3069
3070 mss = skb_shinfo(p)->gso_size;
3071
3072 flush |= (len - 1) >= mss;
3073 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
3074
3075 if (flush || skb_gro_receive(head, skb)) {
3076 mss = 1;
3077 goto out_check_final;
3078 }
3079
3080 p = *head;
3081 th2 = tcp_hdr(p);
3082 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
3083
3084out_check_final:
3085 flush = len < mss;
3086 flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
3087 TCP_FLAG_RST | TCP_FLAG_SYN |
3088 TCP_FLAG_FIN));
3089
3090 if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
3091 pp = head;
3092
3093out:
3094 NAPI_GRO_CB(skb)->flush |= flush;
3095
3096 return pp;
3097}
3098EXPORT_SYMBOL(tcp_gro_receive);
3099
3100int tcp_gro_complete(struct sk_buff *skb)
3101{
3102 struct tcphdr *th = tcp_hdr(skb);
3103
3104 skb->csum_start = skb_transport_header(skb) - skb->head;
3105 skb->csum_offset = offsetof(struct tcphdr, check);
3106 skb->ip_summed = CHECKSUM_PARTIAL;
3107
3108 skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
3109
3110 if (th->cwr)
3111 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
3112
3113 return 0;
3114}
3115EXPORT_SYMBOL(tcp_gro_complete);
3116
3117#ifdef CONFIG_TCP_MD5SIG 2914#ifdef CONFIG_TCP_MD5SIG
3118static unsigned long tcp_md5sig_users; 2915static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
3119static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; 2916static DEFINE_MUTEX(tcp_md5sig_mutex);
3120static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
3121 2917
3122static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) 2918static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
3123{ 2919{
@@ -3132,30 +2928,14 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
3132 free_percpu(pool); 2928 free_percpu(pool);
3133} 2929}
3134 2930
3135void tcp_free_md5sig_pool(void) 2931static void __tcp_alloc_md5sig_pool(void)
3136{
3137 struct tcp_md5sig_pool __percpu *pool = NULL;
3138
3139 spin_lock_bh(&tcp_md5sig_pool_lock);
3140 if (--tcp_md5sig_users == 0) {
3141 pool = tcp_md5sig_pool;
3142 tcp_md5sig_pool = NULL;
3143 }
3144 spin_unlock_bh(&tcp_md5sig_pool_lock);
3145 if (pool)
3146 __tcp_free_md5sig_pool(pool);
3147}
3148EXPORT_SYMBOL(tcp_free_md5sig_pool);
3149
3150static struct tcp_md5sig_pool __percpu *
3151__tcp_alloc_md5sig_pool(struct sock *sk)
3152{ 2932{
3153 int cpu; 2933 int cpu;
3154 struct tcp_md5sig_pool __percpu *pool; 2934 struct tcp_md5sig_pool __percpu *pool;
3155 2935
3156 pool = alloc_percpu(struct tcp_md5sig_pool); 2936 pool = alloc_percpu(struct tcp_md5sig_pool);
3157 if (!pool) 2937 if (!pool)
3158 return NULL; 2938 return;
3159 2939
3160 for_each_possible_cpu(cpu) { 2940 for_each_possible_cpu(cpu) {
3161 struct crypto_hash *hash; 2941 struct crypto_hash *hash;
@@ -3166,53 +2946,27 @@ __tcp_alloc_md5sig_pool(struct sock *sk)
3166 2946
3167 per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; 2947 per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
3168 } 2948 }
3169 return pool; 2949 /* before setting tcp_md5sig_pool, we must commit all writes
2950 * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
2951 */
2952 smp_wmb();
2953 tcp_md5sig_pool = pool;
2954 return;
3170out_free: 2955out_free:
3171 __tcp_free_md5sig_pool(pool); 2956 __tcp_free_md5sig_pool(pool);
3172 return NULL;
3173} 2957}
3174 2958
3175struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) 2959bool tcp_alloc_md5sig_pool(void)
3176{ 2960{
3177 struct tcp_md5sig_pool __percpu *pool; 2961 if (unlikely(!tcp_md5sig_pool)) {
3178 bool alloc = false; 2962 mutex_lock(&tcp_md5sig_mutex);
3179 2963
3180retry: 2964 if (!tcp_md5sig_pool)
3181 spin_lock_bh(&tcp_md5sig_pool_lock); 2965 __tcp_alloc_md5sig_pool();
3182 pool = tcp_md5sig_pool; 2966
3183 if (tcp_md5sig_users++ == 0) { 2967 mutex_unlock(&tcp_md5sig_mutex);
3184 alloc = true;
3185 spin_unlock_bh(&tcp_md5sig_pool_lock);
3186 } else if (!pool) {
3187 tcp_md5sig_users--;
3188 spin_unlock_bh(&tcp_md5sig_pool_lock);
3189 cpu_relax();
3190 goto retry;
3191 } else
3192 spin_unlock_bh(&tcp_md5sig_pool_lock);
3193
3194 if (alloc) {
3195 /* we cannot hold spinlock here because this may sleep. */
3196 struct tcp_md5sig_pool __percpu *p;
3197
3198 p = __tcp_alloc_md5sig_pool(sk);
3199 spin_lock_bh(&tcp_md5sig_pool_lock);
3200 if (!p) {
3201 tcp_md5sig_users--;
3202 spin_unlock_bh(&tcp_md5sig_pool_lock);
3203 return NULL;
3204 }
3205 pool = tcp_md5sig_pool;
3206 if (pool) {
3207 /* oops, it has already been assigned. */
3208 spin_unlock_bh(&tcp_md5sig_pool_lock);
3209 __tcp_free_md5sig_pool(p);
3210 } else {
3211 tcp_md5sig_pool = pool = p;
3212 spin_unlock_bh(&tcp_md5sig_pool_lock);
3213 }
3214 } 2968 }
3215 return pool; 2969 return tcp_md5sig_pool != NULL;
3216} 2970}
3217EXPORT_SYMBOL(tcp_alloc_md5sig_pool); 2971EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
3218 2972
@@ -3229,28 +2983,15 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
3229 struct tcp_md5sig_pool __percpu *p; 2983 struct tcp_md5sig_pool __percpu *p;
3230 2984
3231 local_bh_disable(); 2985 local_bh_disable();
3232 2986 p = ACCESS_ONCE(tcp_md5sig_pool);
3233 spin_lock(&tcp_md5sig_pool_lock);
3234 p = tcp_md5sig_pool;
3235 if (p)
3236 tcp_md5sig_users++;
3237 spin_unlock(&tcp_md5sig_pool_lock);
3238
3239 if (p) 2987 if (p)
3240 return this_cpu_ptr(p); 2988 return __this_cpu_ptr(p);
3241 2989
3242 local_bh_enable(); 2990 local_bh_enable();
3243 return NULL; 2991 return NULL;
3244} 2992}
3245EXPORT_SYMBOL(tcp_get_md5sig_pool); 2993EXPORT_SYMBOL(tcp_get_md5sig_pool);
3246 2994
3247void tcp_put_md5sig_pool(void)
3248{
3249 local_bh_enable();
3250 tcp_free_md5sig_pool();
3251}
3252EXPORT_SYMBOL(tcp_put_md5sig_pool);
3253
3254int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, 2995int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
3255 const struct tcphdr *th) 2996 const struct tcphdr *th)
3256{ 2997{
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index a9077f441cb2..b6ae92a51f58 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -206,8 +206,8 @@ static u32 cubic_root(u64 a)
206 */ 206 */
207static inline void bictcp_update(struct bictcp *ca, u32 cwnd) 207static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
208{ 208{
209 u64 offs; 209 u32 delta, bic_target, max_cnt;
210 u32 delta, t, bic_target, max_cnt; 210 u64 offs, t;
211 211
212 ca->ack_cnt++; /* count the number of ACKs */ 212 ca->ack_cnt++; /* count the number of ACKs */
213 213
@@ -250,9 +250,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
250 * if the cwnd < 1 million packets !!! 250 * if the cwnd < 1 million packets !!!
251 */ 251 */
252 252
253 t = (s32)(tcp_time_stamp - ca->epoch_start);
254 t += msecs_to_jiffies(ca->delay_min >> 3);
253 /* change the unit from HZ to bictcp_HZ */ 255 /* change the unit from HZ to bictcp_HZ */
254 t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) 256 t <<= BICTCP_HZ;
255 - ca->epoch_start) << BICTCP_HZ) / HZ; 257 do_div(t, HZ);
256 258
257 if (t < ca->bic_K) /* t - K */ 259 if (t < ca->bic_K) /* t - K */
258 offs = ca->bic_K - t; 260 offs = ca->bic_K - t;
@@ -414,7 +416,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
414 return; 416 return;
415 417
416 /* Discard delay samples right after fast recovery */ 418 /* Discard delay samples right after fast recovery */
417 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) 419 if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
418 return; 420 return;
419 421
420 delay = (rtt_us << 3) / USEC_PER_MSEC; 422 delay = (rtt_us << 3) / USEC_PER_MSEC;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 8f7ef0ad80e5..ab7bd35bb312 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -58,23 +58,22 @@ error: kfree(ctx);
58 return err; 58 return err;
59} 59}
60 60
61/* Computes the fastopen cookie for the peer. 61/* Computes the fastopen cookie for the IP path.
62 * The peer address is a 128 bits long (pad with zeros for IPv4). 62 * The path is a 128 bits long (pad with zeros for IPv4).
63 * 63 *
64 * The caller must check foc->len to determine if a valid cookie 64 * The caller must check foc->len to determine if a valid cookie
65 * has been generated successfully. 65 * has been generated successfully.
66*/ 66*/
67void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc) 67void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
68 struct tcp_fastopen_cookie *foc)
68{ 69{
69 __be32 peer_addr[4] = { addr, 0, 0, 0 }; 70 __be32 path[4] = { src, dst, 0, 0 };
70 struct tcp_fastopen_context *ctx; 71 struct tcp_fastopen_context *ctx;
71 72
72 rcu_read_lock(); 73 rcu_read_lock();
73 ctx = rcu_dereference(tcp_fastopen_ctx); 74 ctx = rcu_dereference(tcp_fastopen_ctx);
74 if (ctx) { 75 if (ctx) {
75 crypto_cipher_encrypt_one(ctx->tfm, 76 crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path);
76 foc->val,
77 (__u8 *)peer_addr);
78 foc->len = TCP_FASTOPEN_COOKIE_SIZE; 77 foc->len = TCP_FASTOPEN_COOKIE_SIZE;
79 } 78 }
80 rcu_read_unlock(); 79 rcu_read_unlock();
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9c6225780bd5..25a89eaa669d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -347,24 +347,13 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
347} 347}
348 348
349/* 3. Tuning rcvbuf, when connection enters established state. */ 349/* 3. Tuning rcvbuf, when connection enters established state. */
350
351static void tcp_fixup_rcvbuf(struct sock *sk) 350static void tcp_fixup_rcvbuf(struct sock *sk)
352{ 351{
353 u32 mss = tcp_sk(sk)->advmss; 352 u32 mss = tcp_sk(sk)->advmss;
354 u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
355 int rcvmem; 353 int rcvmem;
356 354
357 /* Limit to 10 segments if mss <= 1460, 355 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
358 * or 14600/mss segments, with a minimum of two segments. 356 tcp_default_init_rwnd(mss);
359 */
360 if (mss > 1460)
361 icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
362
363 rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
364 while (tcp_win_from_space(rcvmem) < mss)
365 rcvmem += 128;
366
367 rcvmem *= icwnd;
368 357
369 if (sk->sk_rcvbuf < rcvmem) 358 if (sk->sk_rcvbuf < rcvmem)
370 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); 359 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
@@ -699,6 +688,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
699 } 688 }
700} 689}
701 690
691/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
692 * Note: TCP stack does not yet implement pacing.
693 * FQ packet scheduler can be used to implement cheap but effective
694 * TCP pacing, to smooth the burst on large writes when packets
695 * in flight is significantly lower than cwnd (or rwin)
696 */
697static void tcp_update_pacing_rate(struct sock *sk)
698{
699 const struct tcp_sock *tp = tcp_sk(sk);
700 u64 rate;
701
702 /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
703 rate = (u64)tp->mss_cache * 2 * (HZ << 3);
704
705 rate *= max(tp->snd_cwnd, tp->packets_out);
706
707 /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
708 * be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
709 * We probably need usec resolution in the future.
710 * Note: This also takes care of possible srtt=0 case,
711 * when tcp_rtt_estimator() was not yet called.
712 */
713 if (tp->srtt > 8 + 2)
714 do_div(rate, tp->srtt);
715
716 sk->sk_pacing_rate = min_t(u64, rate, ~0U);
717}
718
702/* Calculate rto without backoff. This is the second half of Van Jacobson's 719/* Calculate rto without backoff. This is the second half of Van Jacobson's
703 * routine referred to above. 720 * routine referred to above.
704 */ 721 */
@@ -1059,6 +1076,7 @@ struct tcp_sacktag_state {
1059 int reord; 1076 int reord;
1060 int fack_count; 1077 int fack_count;
1061 int flag; 1078 int flag;
1079 s32 rtt; /* RTT measured by SACKing never-retransmitted data */
1062}; 1080};
1063 1081
1064/* Check if skb is fully within the SACK block. In presence of GSO skbs, 1082/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1119,7 +1137,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1119static u8 tcp_sacktag_one(struct sock *sk, 1137static u8 tcp_sacktag_one(struct sock *sk,
1120 struct tcp_sacktag_state *state, u8 sacked, 1138 struct tcp_sacktag_state *state, u8 sacked,
1121 u32 start_seq, u32 end_seq, 1139 u32 start_seq, u32 end_seq,
1122 bool dup_sack, int pcount) 1140 int dup_sack, int pcount, u32 xmit_time)
1123{ 1141{
1124 struct tcp_sock *tp = tcp_sk(sk); 1142 struct tcp_sock *tp = tcp_sk(sk);
1125 int fack_count = state->fack_count; 1143 int fack_count = state->fack_count;
@@ -1159,6 +1177,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
1159 state->reord); 1177 state->reord);
1160 if (!after(end_seq, tp->high_seq)) 1178 if (!after(end_seq, tp->high_seq))
1161 state->flag |= FLAG_ORIG_SACK_ACKED; 1179 state->flag |= FLAG_ORIG_SACK_ACKED;
1180 /* Pick the earliest sequence sacked for RTT */
1181 if (state->rtt < 0)
1182 state->rtt = tcp_time_stamp - xmit_time;
1162 } 1183 }
1163 1184
1164 if (sacked & TCPCB_LOST) { 1185 if (sacked & TCPCB_LOST) {
@@ -1216,7 +1237,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1216 * tcp_highest_sack_seq() when skb is highest_sack. 1237 * tcp_highest_sack_seq() when skb is highest_sack.
1217 */ 1238 */
1218 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, 1239 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1219 start_seq, end_seq, dup_sack, pcount); 1240 start_seq, end_seq, dup_sack, pcount,
1241 TCP_SKB_CB(skb)->when);
1220 1242
1221 if (skb == tp->lost_skb_hint) 1243 if (skb == tp->lost_skb_hint)
1222 tp->lost_cnt_hint += pcount; 1244 tp->lost_cnt_hint += pcount;
@@ -1257,8 +1279,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1257 1279
1258 if (skb == tp->retransmit_skb_hint) 1280 if (skb == tp->retransmit_skb_hint)
1259 tp->retransmit_skb_hint = prev; 1281 tp->retransmit_skb_hint = prev;
1260 if (skb == tp->scoreboard_skb_hint)
1261 tp->scoreboard_skb_hint = prev;
1262 if (skb == tp->lost_skb_hint) { 1282 if (skb == tp->lost_skb_hint) {
1263 tp->lost_skb_hint = prev; 1283 tp->lost_skb_hint = prev;
1264 tp->lost_cnt_hint -= tcp_skb_pcount(prev); 1284 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
@@ -1492,7 +1512,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1492 TCP_SKB_CB(skb)->seq, 1512 TCP_SKB_CB(skb)->seq,
1493 TCP_SKB_CB(skb)->end_seq, 1513 TCP_SKB_CB(skb)->end_seq,
1494 dup_sack, 1514 dup_sack,
1495 tcp_skb_pcount(skb)); 1515 tcp_skb_pcount(skb),
1516 TCP_SKB_CB(skb)->when);
1496 1517
1497 if (!before(TCP_SKB_CB(skb)->seq, 1518 if (!before(TCP_SKB_CB(skb)->seq,
1498 tcp_highest_sack_seq(tp))) 1519 tcp_highest_sack_seq(tp)))
@@ -1549,7 +1570,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
1549 1570
1550static int 1571static int
1551tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1572tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1552 u32 prior_snd_una) 1573 u32 prior_snd_una, s32 *sack_rtt)
1553{ 1574{
1554 struct tcp_sock *tp = tcp_sk(sk); 1575 struct tcp_sock *tp = tcp_sk(sk);
1555 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1576 const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1567,6 +1588,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1567 1588
1568 state.flag = 0; 1589 state.flag = 0;
1569 state.reord = tp->packets_out; 1590 state.reord = tp->packets_out;
1591 state.rtt = -1;
1570 1592
1571 if (!tp->sacked_out) { 1593 if (!tp->sacked_out) {
1572 if (WARN_ON(tp->fackets_out)) 1594 if (WARN_ON(tp->fackets_out))
@@ -1750,6 +1772,7 @@ out:
1750 WARN_ON((int)tp->retrans_out < 0); 1772 WARN_ON((int)tp->retrans_out < 0);
1751 WARN_ON((int)tcp_packets_in_flight(tp) < 0); 1773 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1752#endif 1774#endif
1775 *sack_rtt = state.rtt;
1753 return state.flag; 1776 return state.flag;
1754} 1777}
1755 1778
@@ -1882,8 +1905,13 @@ void tcp_enter_loss(struct sock *sk, int how)
1882 } 1905 }
1883 tcp_verify_left_out(tp); 1906 tcp_verify_left_out(tp);
1884 1907
1885 tp->reordering = min_t(unsigned int, tp->reordering, 1908 /* Timeout in disordered state after receiving substantial DUPACKs
1886 sysctl_tcp_reordering); 1909 * suggests that the degree of reordering is over-estimated.
1910 */
1911 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
1912 tp->sacked_out >= sysctl_tcp_reordering)
1913 tp->reordering = min_t(unsigned int, tp->reordering,
1914 sysctl_tcp_reordering);
1887 tcp_set_ca_state(sk, TCP_CA_Loss); 1915 tcp_set_ca_state(sk, TCP_CA_Loss);
1888 tp->high_seq = tp->snd_nxt; 1916 tp->high_seq = tp->snd_nxt;
1889 TCP_ECN_queue_cwr(tp); 1917 TCP_ECN_queue_cwr(tp);
@@ -1966,20 +1994,6 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
1966 return true; 1994 return true;
1967} 1995}
1968 1996
1969static inline int tcp_skb_timedout(const struct sock *sk,
1970 const struct sk_buff *skb)
1971{
1972 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
1973}
1974
1975static inline int tcp_head_timedout(const struct sock *sk)
1976{
1977 const struct tcp_sock *tp = tcp_sk(sk);
1978
1979 return tp->packets_out &&
1980 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
1981}
1982
1983/* Linux NewReno/SACK/FACK/ECN state machine. 1997/* Linux NewReno/SACK/FACK/ECN state machine.
1984 * -------------------------------------- 1998 * --------------------------------------
1985 * 1999 *
@@ -2086,12 +2100,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2086 if (tcp_dupack_heuristics(tp) > tp->reordering) 2100 if (tcp_dupack_heuristics(tp) > tp->reordering)
2087 return true; 2101 return true;
2088 2102
2089 /* Trick#3 : when we use RFC2988 timer restart, fast
2090 * retransmit can be triggered by timeout of queue head.
2091 */
2092 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2093 return true;
2094
2095 /* Trick#4: It is still not OK... But will it be useful to delay 2103 /* Trick#4: It is still not OK... But will it be useful to delay
2096 * recovery more? 2104 * recovery more?
2097 */ 2105 */
@@ -2128,44 +2136,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2128 return false; 2136 return false;
2129} 2137}
2130 2138
2131/* New heuristics: it is possible only after we switched to restart timer
2132 * each time when something is ACKed. Hence, we can detect timed out packets
2133 * during fast retransmit without falling to slow start.
2134 *
2135 * Usefulness of this as is very questionable, since we should know which of
2136 * the segments is the next to timeout which is relatively expensive to find
2137 * in general case unless we add some data structure just for that. The
2138 * current approach certainly won't find the right one too often and when it
2139 * finally does find _something_ it usually marks large part of the window
2140 * right away (because a retransmission with a larger timestamp blocks the
2141 * loop from advancing). -ij
2142 */
2143static void tcp_timeout_skbs(struct sock *sk)
2144{
2145 struct tcp_sock *tp = tcp_sk(sk);
2146 struct sk_buff *skb;
2147
2148 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2149 return;
2150
2151 skb = tp->scoreboard_skb_hint;
2152 if (tp->scoreboard_skb_hint == NULL)
2153 skb = tcp_write_queue_head(sk);
2154
2155 tcp_for_write_queue_from(skb, sk) {
2156 if (skb == tcp_send_head(sk))
2157 break;
2158 if (!tcp_skb_timedout(sk, skb))
2159 break;
2160
2161 tcp_skb_mark_lost(tp, skb);
2162 }
2163
2164 tp->scoreboard_skb_hint = skb;
2165
2166 tcp_verify_left_out(tp);
2167}
2168
2169/* Detect loss in event "A" above by marking head of queue up as lost. 2139/* Detect loss in event "A" above by marking head of queue up as lost.
2170 * For FACK or non-SACK(Reno) senders, the first "packets" number of segments 2140 * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
2171 * are considered lost. For RFC3517 SACK, a segment is considered lost if it 2141 * are considered lost. For RFC3517 SACK, a segment is considered lost if it
@@ -2251,8 +2221,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2251 else if (fast_rexmit) 2221 else if (fast_rexmit)
2252 tcp_mark_head_lost(sk, 1, 1); 2222 tcp_mark_head_lost(sk, 1, 1);
2253 } 2223 }
2254
2255 tcp_timeout_skbs(sk);
2256} 2224}
2257 2225
2258/* CWND moderation, preventing bursts due to too big ACKs 2226/* CWND moderation, preventing bursts due to too big ACKs
@@ -2307,10 +2275,22 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2307#define DBGUNDO(x...) do { } while (0) 2275#define DBGUNDO(x...) do { } while (0)
2308#endif 2276#endif
2309 2277
2310static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) 2278static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2311{ 2279{
2312 struct tcp_sock *tp = tcp_sk(sk); 2280 struct tcp_sock *tp = tcp_sk(sk);
2313 2281
2282 if (unmark_loss) {
2283 struct sk_buff *skb;
2284
2285 tcp_for_write_queue(skb, sk) {
2286 if (skb == tcp_send_head(sk))
2287 break;
2288 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2289 }
2290 tp->lost_out = 0;
2291 tcp_clear_all_retrans_hints(tp);
2292 }
2293
2314 if (tp->prior_ssthresh) { 2294 if (tp->prior_ssthresh) {
2315 const struct inet_connection_sock *icsk = inet_csk(sk); 2295 const struct inet_connection_sock *icsk = inet_csk(sk);
2316 2296
@@ -2319,7 +2299,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2319 else 2299 else
2320 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); 2300 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2321 2301
2322 if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) { 2302 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2323 tp->snd_ssthresh = tp->prior_ssthresh; 2303 tp->snd_ssthresh = tp->prior_ssthresh;
2324 TCP_ECN_withdraw_cwr(tp); 2304 TCP_ECN_withdraw_cwr(tp);
2325 } 2305 }
@@ -2327,6 +2307,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2327 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); 2307 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2328 } 2308 }
2329 tp->snd_cwnd_stamp = tcp_time_stamp; 2309 tp->snd_cwnd_stamp = tcp_time_stamp;
2310 tp->undo_marker = 0;
2330} 2311}
2331 2312
2332static inline bool tcp_may_undo(const struct tcp_sock *tp) 2313static inline bool tcp_may_undo(const struct tcp_sock *tp)
@@ -2346,14 +2327,13 @@ static bool tcp_try_undo_recovery(struct sock *sk)
2346 * or our original transmission succeeded. 2327 * or our original transmission succeeded.
2347 */ 2328 */
2348 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); 2329 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2349 tcp_undo_cwr(sk, true); 2330 tcp_undo_cwnd_reduction(sk, false);
2350 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) 2331 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2351 mib_idx = LINUX_MIB_TCPLOSSUNDO; 2332 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2352 else 2333 else
2353 mib_idx = LINUX_MIB_TCPFULLUNDO; 2334 mib_idx = LINUX_MIB_TCPFULLUNDO;
2354 2335
2355 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2336 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2356 tp->undo_marker = 0;
2357 } 2337 }
2358 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { 2338 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2359 /* Hold old state until something *above* high_seq 2339 /* Hold old state until something *above* high_seq
@@ -2367,16 +2347,17 @@ static bool tcp_try_undo_recovery(struct sock *sk)
2367} 2347}
2368 2348
2369/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ 2349/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
2370static void tcp_try_undo_dsack(struct sock *sk) 2350static bool tcp_try_undo_dsack(struct sock *sk)
2371{ 2351{
2372 struct tcp_sock *tp = tcp_sk(sk); 2352 struct tcp_sock *tp = tcp_sk(sk);
2373 2353
2374 if (tp->undo_marker && !tp->undo_retrans) { 2354 if (tp->undo_marker && !tp->undo_retrans) {
2375 DBGUNDO(sk, "D-SACK"); 2355 DBGUNDO(sk, "D-SACK");
2376 tcp_undo_cwr(sk, true); 2356 tcp_undo_cwnd_reduction(sk, false);
2377 tp->undo_marker = 0;
2378 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); 2357 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2358 return true;
2379 } 2359 }
2360 return false;
2380} 2361}
2381 2362
2382/* We can clear retrans_stamp when there are no retransmissions in the 2363/* We can clear retrans_stamp when there are no retransmissions in the
@@ -2408,60 +2389,20 @@ static bool tcp_any_retrans_done(const struct sock *sk)
2408 return false; 2389 return false;
2409} 2390}
2410 2391
2411/* Undo during fast recovery after partial ACK. */
2412
2413static int tcp_try_undo_partial(struct sock *sk, int acked)
2414{
2415 struct tcp_sock *tp = tcp_sk(sk);
2416 /* Partial ACK arrived. Force Hoe's retransmit. */
2417 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2418
2419 if (tcp_may_undo(tp)) {
2420 /* Plain luck! Hole if filled with delayed
2421 * packet, rather than with a retransmit.
2422 */
2423 if (!tcp_any_retrans_done(sk))
2424 tp->retrans_stamp = 0;
2425
2426 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2427
2428 DBGUNDO(sk, "Hoe");
2429 tcp_undo_cwr(sk, false);
2430 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2431
2432 /* So... Do not make Hoe's retransmit yet.
2433 * If the first packet was delayed, the rest
2434 * ones are most probably delayed as well.
2435 */
2436 failed = 0;
2437 }
2438 return failed;
2439}
2440
2441/* Undo during loss recovery after partial ACK or using F-RTO. */ 2392/* Undo during loss recovery after partial ACK or using F-RTO. */
2442static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) 2393static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2443{ 2394{
2444 struct tcp_sock *tp = tcp_sk(sk); 2395 struct tcp_sock *tp = tcp_sk(sk);
2445 2396
2446 if (frto_undo || tcp_may_undo(tp)) { 2397 if (frto_undo || tcp_may_undo(tp)) {
2447 struct sk_buff *skb; 2398 tcp_undo_cwnd_reduction(sk, true);
2448 tcp_for_write_queue(skb, sk) {
2449 if (skb == tcp_send_head(sk))
2450 break;
2451 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2452 }
2453
2454 tcp_clear_all_retrans_hints(tp);
2455 2399
2456 DBGUNDO(sk, "partial loss"); 2400 DBGUNDO(sk, "partial loss");
2457 tp->lost_out = 0;
2458 tcp_undo_cwr(sk, true);
2459 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); 2401 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2460 if (frto_undo) 2402 if (frto_undo)
2461 NET_INC_STATS_BH(sock_net(sk), 2403 NET_INC_STATS_BH(sock_net(sk),
2462 LINUX_MIB_TCPSPURIOUSRTOS); 2404 LINUX_MIB_TCPSPURIOUSRTOS);
2463 inet_csk(sk)->icsk_retransmits = 0; 2405 inet_csk(sk)->icsk_retransmits = 0;
2464 tp->undo_marker = 0;
2465 if (frto_undo || tcp_is_sack(tp)) 2406 if (frto_undo || tcp_is_sack(tp))
2466 tcp_set_ca_state(sk, TCP_CA_Open); 2407 tcp_set_ca_state(sk, TCP_CA_Open);
2467 return true; 2408 return true;
@@ -2494,12 +2435,14 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2494 TCP_ECN_queue_cwr(tp); 2435 TCP_ECN_queue_cwr(tp);
2495} 2436}
2496 2437
2497static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, 2438static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
2498 int fast_rexmit) 2439 int fast_rexmit)
2499{ 2440{
2500 struct tcp_sock *tp = tcp_sk(sk); 2441 struct tcp_sock *tp = tcp_sk(sk);
2501 int sndcnt = 0; 2442 int sndcnt = 0;
2502 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); 2443 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2444 int newly_acked_sacked = prior_unsacked -
2445 (tp->packets_out - tp->sacked_out);
2503 2446
2504 tp->prr_delivered += newly_acked_sacked; 2447 tp->prr_delivered += newly_acked_sacked;
2505 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { 2448 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
@@ -2556,7 +2499,7 @@ static void tcp_try_keep_open(struct sock *sk)
2556 } 2499 }
2557} 2500}
2558 2501
2559static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) 2502static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2560{ 2503{
2561 struct tcp_sock *tp = tcp_sk(sk); 2504 struct tcp_sock *tp = tcp_sk(sk);
2562 2505
@@ -2570,10 +2513,8 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2570 2513
2571 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2514 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2572 tcp_try_keep_open(sk); 2515 tcp_try_keep_open(sk);
2573 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2574 tcp_moderate_cwnd(tp);
2575 } else { 2516 } else {
2576 tcp_cwnd_reduction(sk, newly_acked_sacked, 0); 2517 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2577 } 2518 }
2578} 2519}
2579 2520
@@ -2731,6 +2672,40 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2731 tcp_xmit_retransmit_queue(sk); 2672 tcp_xmit_retransmit_queue(sk);
2732} 2673}
2733 2674
2675/* Undo during fast recovery after partial ACK. */
2676static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2677 const int prior_unsacked)
2678{
2679 struct tcp_sock *tp = tcp_sk(sk);
2680
2681 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2682 /* Plain luck! Hole if filled with delayed
2683 * packet, rather than with a retransmit.
2684 */
2685 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2686
2687 /* We are getting evidence that the reordering degree is higher
2688 * than we realized. If there are no retransmits out then we
2689 * can undo. Otherwise we clock out new packets but do not
2690 * mark more packets lost or retransmit more.
2691 */
2692 if (tp->retrans_out) {
2693 tcp_cwnd_reduction(sk, prior_unsacked, 0);
2694 return true;
2695 }
2696
2697 if (!tcp_any_retrans_done(sk))
2698 tp->retrans_stamp = 0;
2699
2700 DBGUNDO(sk, "partial recovery");
2701 tcp_undo_cwnd_reduction(sk, true);
2702 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2703 tcp_try_keep_open(sk);
2704 return true;
2705 }
2706 return false;
2707}
2708
2734/* Process an event, which can update packets-in-flight not trivially. 2709/* Process an event, which can update packets-in-flight not trivially.
2735 * Main goal of this function is to calculate new estimate for left_out, 2710 * Main goal of this function is to calculate new estimate for left_out,
2736 * taking into account both packets sitting in receiver's buffer and 2711 * taking into account both packets sitting in receiver's buffer and
@@ -2742,15 +2717,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2742 * It does _not_ decide what to send, it is made in function 2717 * It does _not_ decide what to send, it is made in function
2743 * tcp_xmit_retransmit_queue(). 2718 * tcp_xmit_retransmit_queue().
2744 */ 2719 */
2745static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, 2720static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2746 int prior_sacked, int prior_packets, 2721 const int prior_unsacked,
2747 bool is_dupack, int flag) 2722 bool is_dupack, int flag)
2748{ 2723{
2749 struct inet_connection_sock *icsk = inet_csk(sk); 2724 struct inet_connection_sock *icsk = inet_csk(sk);
2750 struct tcp_sock *tp = tcp_sk(sk); 2725 struct tcp_sock *tp = tcp_sk(sk);
2751 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && 2726 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2752 (tcp_fackets_out(tp) > tp->reordering)); 2727 (tcp_fackets_out(tp) > tp->reordering));
2753 int newly_acked_sacked = 0;
2754 int fast_rexmit = 0; 2728 int fast_rexmit = 0;
2755 2729
2756 if (WARN_ON(!tp->packets_out && tp->sacked_out)) 2730 if (WARN_ON(!tp->packets_out && tp->sacked_out))
@@ -2802,10 +2776,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2802 if (!(flag & FLAG_SND_UNA_ADVANCED)) { 2776 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
2803 if (tcp_is_reno(tp) && is_dupack) 2777 if (tcp_is_reno(tp) && is_dupack)
2804 tcp_add_reno_sack(sk); 2778 tcp_add_reno_sack(sk);
2805 } else 2779 } else {
2806 do_lost = tcp_try_undo_partial(sk, pkts_acked); 2780 if (tcp_try_undo_partial(sk, acked, prior_unsacked))
2807 newly_acked_sacked = prior_packets - tp->packets_out + 2781 return;
2808 tp->sacked_out - prior_sacked; 2782 /* Partial ACK arrived. Force fast retransmit. */
2783 do_lost = tcp_is_reno(tp) ||
2784 tcp_fackets_out(tp) > tp->reordering;
2785 }
2786 if (tcp_try_undo_dsack(sk)) {
2787 tcp_try_keep_open(sk);
2788 return;
2789 }
2809 break; 2790 break;
2810 case TCP_CA_Loss: 2791 case TCP_CA_Loss:
2811 tcp_process_loss(sk, flag, is_dupack); 2792 tcp_process_loss(sk, flag, is_dupack);
@@ -2819,14 +2800,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2819 if (is_dupack) 2800 if (is_dupack)
2820 tcp_add_reno_sack(sk); 2801 tcp_add_reno_sack(sk);
2821 } 2802 }
2822 newly_acked_sacked = prior_packets - tp->packets_out +
2823 tp->sacked_out - prior_sacked;
2824 2803
2825 if (icsk->icsk_ca_state <= TCP_CA_Disorder) 2804 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
2826 tcp_try_undo_dsack(sk); 2805 tcp_try_undo_dsack(sk);
2827 2806
2828 if (!tcp_time_to_recover(sk, flag)) { 2807 if (!tcp_time_to_recover(sk, flag)) {
2829 tcp_try_to_open(sk, flag, newly_acked_sacked); 2808 tcp_try_to_open(sk, flag, prior_unsacked);
2830 return; 2809 return;
2831 } 2810 }
2832 2811
@@ -2846,71 +2825,57 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2846 fast_rexmit = 1; 2825 fast_rexmit = 1;
2847 } 2826 }
2848 2827
2849 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) 2828 if (do_lost)
2850 tcp_update_scoreboard(sk, fast_rexmit); 2829 tcp_update_scoreboard(sk, fast_rexmit);
2851 tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); 2830 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
2852 tcp_xmit_retransmit_queue(sk); 2831 tcp_xmit_retransmit_queue(sk);
2853} 2832}
2854 2833
2855void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) 2834static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2835 s32 seq_rtt, s32 sack_rtt)
2856{ 2836{
2857 tcp_rtt_estimator(sk, seq_rtt); 2837 const struct tcp_sock *tp = tcp_sk(sk);
2858 tcp_set_rto(sk); 2838
2859 inet_csk(sk)->icsk_backoff = 0; 2839 /* Prefer RTT measured from ACK's timing to TS-ECR. This is because
2860} 2840 * broken middle-boxes or peers may corrupt TS-ECR fields. But
2861EXPORT_SYMBOL(tcp_valid_rtt_meas); 2841 * Karn's algorithm forbids taking RTT if some retransmitted data
2842 * is acked (RFC6298).
2843 */
2844 if (flag & FLAG_RETRANS_DATA_ACKED)
2845 seq_rtt = -1;
2846
2847 if (seq_rtt < 0)
2848 seq_rtt = sack_rtt;
2862 2849
2863/* Read draft-ietf-tcplw-high-performance before mucking
2864 * with this code. (Supersedes RFC1323)
2865 */
2866static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
2867{
2868 /* RTTM Rule: A TSecr value received in a segment is used to 2850 /* RTTM Rule: A TSecr value received in a segment is used to
2869 * update the averaged RTT measurement only if the segment 2851 * update the averaged RTT measurement only if the segment
2870 * acknowledges some new data, i.e., only if it advances the 2852 * acknowledges some new data, i.e., only if it advances the
2871 * left edge of the send window. 2853 * left edge of the send window.
2872 *
2873 * See draft-ietf-tcplw-high-performance-00, section 3.3. 2854 * See draft-ietf-tcplw-high-performance-00, section 3.3.
2874 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
2875 *
2876 * Changed: reset backoff as soon as we see the first valid sample.
2877 * If we do not, we get strongly overestimated rto. With timestamps
2878 * samples are accepted even from very old segments: f.e., when rtt=1
2879 * increases to 8, we retransmit 5 times and after 8 seconds delayed
2880 * answer arrives rto becomes 120 seconds! If at least one of segments
2881 * in window is lost... Voila. --ANK (010210)
2882 */ 2855 */
2883 struct tcp_sock *tp = tcp_sk(sk); 2856 if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
2857 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
2884 2858
2885 tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr); 2859 if (seq_rtt < 0)
2886} 2860 return false;
2887
2888static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
2889{
2890 /* We don't have a timestamp. Can only use
2891 * packets that are not retransmitted to determine
2892 * rtt estimates. Also, we must not reset the
2893 * backoff for rto until we get a non-retransmitted
2894 * packet. This allows us to deal with a situation
2895 * where the network delay has increased suddenly.
2896 * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2897 */
2898 2861
2899 if (flag & FLAG_RETRANS_DATA_ACKED) 2862 tcp_rtt_estimator(sk, seq_rtt);
2900 return; 2863 tcp_set_rto(sk);
2901 2864
2902 tcp_valid_rtt_meas(sk, seq_rtt); 2865 /* RFC6298: only reset backoff on valid RTT measurement. */
2866 inet_csk(sk)->icsk_backoff = 0;
2867 return true;
2903} 2868}
2904 2869
2905static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, 2870/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
2906 const s32 seq_rtt) 2871static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
2907{ 2872{
2908 const struct tcp_sock *tp = tcp_sk(sk); 2873 struct tcp_sock *tp = tcp_sk(sk);
2909 /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ 2874 s32 seq_rtt = -1;
2910 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) 2875
2911 tcp_ack_saw_tstamp(sk, flag); 2876 if (tp->lsndtime && !tp->total_retrans)
2912 else if (seq_rtt >= 0) 2877 seq_rtt = tcp_time_stamp - tp->lsndtime;
2913 tcp_ack_no_tstamp(sk, seq_rtt, flag); 2878 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
2914} 2879}
2915 2880
2916static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) 2881static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
@@ -2999,7 +2964,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
2999 * arrived at the other end. 2964 * arrived at the other end.
3000 */ 2965 */
3001static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 2966static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3002 u32 prior_snd_una) 2967 u32 prior_snd_una, s32 sack_rtt)
3003{ 2968{
3004 struct tcp_sock *tp = tcp_sk(sk); 2969 struct tcp_sock *tp = tcp_sk(sk);
3005 const struct inet_connection_sock *icsk = inet_csk(sk); 2970 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3038,8 +3003,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3038 if (sacked & TCPCB_SACKED_RETRANS) 3003 if (sacked & TCPCB_SACKED_RETRANS)
3039 tp->retrans_out -= acked_pcount; 3004 tp->retrans_out -= acked_pcount;
3040 flag |= FLAG_RETRANS_DATA_ACKED; 3005 flag |= FLAG_RETRANS_DATA_ACKED;
3041 ca_seq_rtt = -1;
3042 seq_rtt = -1;
3043 } else { 3006 } else {
3044 ca_seq_rtt = now - scb->when; 3007 ca_seq_rtt = now - scb->when;
3045 last_ackt = skb->tstamp; 3008 last_ackt = skb->tstamp;
@@ -3079,7 +3042,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3079 3042
3080 tcp_unlink_write_queue(skb, sk); 3043 tcp_unlink_write_queue(skb, sk);
3081 sk_wmem_free_skb(sk, skb); 3044 sk_wmem_free_skb(sk, skb);
3082 tp->scoreboard_skb_hint = NULL;
3083 if (skb == tp->retransmit_skb_hint) 3045 if (skb == tp->retransmit_skb_hint)
3084 tp->retransmit_skb_hint = NULL; 3046 tp->retransmit_skb_hint = NULL;
3085 if (skb == tp->lost_skb_hint) 3047 if (skb == tp->lost_skb_hint)
@@ -3092,6 +3054,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3092 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 3054 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3093 flag |= FLAG_SACK_RENEGING; 3055 flag |= FLAG_SACK_RENEGING;
3094 3056
3057 if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) ||
3058 (flag & FLAG_ACKED))
3059 tcp_rearm_rto(sk);
3060
3095 if (flag & FLAG_ACKED) { 3061 if (flag & FLAG_ACKED) {
3096 const struct tcp_congestion_ops *ca_ops 3062 const struct tcp_congestion_ops *ca_ops
3097 = inet_csk(sk)->icsk_ca_ops; 3063 = inet_csk(sk)->icsk_ca_ops;
@@ -3101,9 +3067,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3101 tcp_mtup_probe_success(sk); 3067 tcp_mtup_probe_success(sk);
3102 } 3068 }
3103 3069
3104 tcp_ack_update_rtt(sk, flag, seq_rtt);
3105 tcp_rearm_rto(sk);
3106
3107 if (tcp_is_reno(tp)) { 3070 if (tcp_is_reno(tp)) {
3108 tcp_remove_reno_sacks(sk, pkts_acked); 3071 tcp_remove_reno_sacks(sk, pkts_acked);
3109 } else { 3072 } else {
@@ -3191,11 +3154,22 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3191 inet_csk(sk)->icsk_ca_state != TCP_CA_Open; 3154 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3192} 3155}
3193 3156
3157/* Decide wheather to run the increase function of congestion control. */
3194static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3158static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3195{ 3159{
3196 const struct tcp_sock *tp = tcp_sk(sk); 3160 if (tcp_in_cwnd_reduction(sk))
3197 return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && 3161 return false;
3198 !tcp_in_cwnd_reduction(sk); 3162
3163 /* If reordering is high then always grow cwnd whenever data is
3164 * delivered regardless of its ordering. Otherwise stay conservative
3165 * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
3166 * new SACK or ECE mark may first advance cwnd here and later reduce
3167 * cwnd in tcp_fastretrans_alert() based on more states.
3168 */
3169 if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
3170 return flag & FLAG_FORWARD_PROGRESS;
3171
3172 return flag & FLAG_DATA_ACKED;
3199} 3173}
3200 3174
3201/* Check that window update is acceptable. 3175/* Check that window update is acceptable.
@@ -3330,12 +3304,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3330 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3304 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3331 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3305 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3332 bool is_dupack = false; 3306 bool is_dupack = false;
3333 u32 prior_in_flight; 3307 u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
3334 u32 prior_fackets; 3308 u32 prior_fackets;
3335 int prior_packets = tp->packets_out; 3309 int prior_packets = tp->packets_out;
3336 int prior_sacked = tp->sacked_out; 3310 const int prior_unsacked = tp->packets_out - tp->sacked_out;
3337 int pkts_acked = 0; 3311 int acked = 0; /* Number of packets newly acked */
3338 int previous_packets_out = 0; 3312 s32 sack_rtt = -1;
3339 3313
3340 /* If the ack is older than previous acks 3314 /* If the ack is older than previous acks
3341 * then we can probably ignore it. 3315 * then we can probably ignore it.
@@ -3392,7 +3366,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3392 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); 3366 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3393 3367
3394 if (TCP_SKB_CB(skb)->sacked) 3368 if (TCP_SKB_CB(skb)->sacked)
3395 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3369 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3370 &sack_rtt);
3396 3371
3397 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) 3372 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3398 flag |= FLAG_ECE; 3373 flag |= FLAG_ECE;
@@ -3410,23 +3385,19 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3410 goto no_queue; 3385 goto no_queue;
3411 3386
3412 /* See if we can take anything off of the retransmit queue. */ 3387 /* See if we can take anything off of the retransmit queue. */
3413 previous_packets_out = tp->packets_out; 3388 acked = tp->packets_out;
3414 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); 3389 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
3390 acked -= tp->packets_out;
3415 3391
3416 pkts_acked = previous_packets_out - tp->packets_out; 3392 /* Advance cwnd if state allows */
3393 if (tcp_may_raise_cwnd(sk, flag))
3394 tcp_cong_avoid(sk, ack, prior_in_flight);
3417 3395
3418 if (tcp_ack_is_dubious(sk, flag)) { 3396 if (tcp_ack_is_dubious(sk, flag)) {
3419 /* Advance CWND, if state allows this. */
3420 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3421 tcp_cong_avoid(sk, ack, prior_in_flight);
3422 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3397 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3423 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3398 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3424 prior_packets, is_dupack, flag); 3399 is_dupack, flag);
3425 } else {
3426 if (flag & FLAG_DATA_ACKED)
3427 tcp_cong_avoid(sk, ack, prior_in_flight);
3428 } 3400 }
3429
3430 if (tp->tlp_high_seq) 3401 if (tp->tlp_high_seq)
3431 tcp_process_tlp_ack(sk, ack, flag); 3402 tcp_process_tlp_ack(sk, ack, flag);
3432 3403
@@ -3438,13 +3409,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3438 3409
3439 if (icsk->icsk_pending == ICSK_TIME_RETRANS) 3410 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3440 tcp_schedule_loss_probe(sk); 3411 tcp_schedule_loss_probe(sk);
3412 if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
3413 tcp_update_pacing_rate(sk);
3441 return 1; 3414 return 1;
3442 3415
3443no_queue: 3416no_queue:
3444 /* If data was DSACKed, see if we can undo a cwnd reduction. */ 3417 /* If data was DSACKed, see if we can undo a cwnd reduction. */
3445 if (flag & FLAG_DSACKING_ACK) 3418 if (flag & FLAG_DSACKING_ACK)
3446 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3419 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3447 prior_packets, is_dupack, flag); 3420 is_dupack, flag);
3448 /* If this ack opens up a zero window, clear backoff. It was 3421 /* If this ack opens up a zero window, clear backoff. It was
3449 * being used to time the probes, and is probably far higher than 3422 * being used to time the probes, and is probably far higher than
3450 * it needs to be for normal retransmission. 3423 * it needs to be for normal retransmission.
@@ -3465,9 +3438,10 @@ old_ack:
3465 * If data was DSACKed, see if we can undo a cwnd reduction. 3438 * If data was DSACKed, see if we can undo a cwnd reduction.
3466 */ 3439 */
3467 if (TCP_SKB_CB(skb)->sacked) { 3440 if (TCP_SKB_CB(skb)->sacked) {
3468 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3441 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3469 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3442 &sack_rtt);
3470 prior_packets, is_dupack, flag); 3443 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3444 is_dupack, flag);
3471 } 3445 }
3472 3446
3473 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3447 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -3598,7 +3572,10 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
3598 ++ptr; 3572 ++ptr;
3599 tp->rx_opt.rcv_tsval = ntohl(*ptr); 3573 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3600 ++ptr; 3574 ++ptr;
3601 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; 3575 if (*ptr)
3576 tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
3577 else
3578 tp->rx_opt.rcv_tsecr = 0;
3602 return true; 3579 return true;
3603 } 3580 }
3604 return false; 3581 return false;
@@ -3623,7 +3600,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
3623 } 3600 }
3624 3601
3625 tcp_parse_options(skb, &tp->rx_opt, 1, NULL); 3602 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3626 if (tp->rx_opt.saw_tstamp) 3603 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
3627 tp->rx_opt.rcv_tsecr -= tp->tsoffset; 3604 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3628 3605
3629 return true; 3606 return true;
@@ -3780,6 +3757,7 @@ void tcp_reset(struct sock *sk)
3780static void tcp_fin(struct sock *sk) 3757static void tcp_fin(struct sock *sk)
3781{ 3758{
3782 struct tcp_sock *tp = tcp_sk(sk); 3759 struct tcp_sock *tp = tcp_sk(sk);
3760 const struct dst_entry *dst;
3783 3761
3784 inet_csk_schedule_ack(sk); 3762 inet_csk_schedule_ack(sk);
3785 3763
@@ -3791,7 +3769,9 @@ static void tcp_fin(struct sock *sk)
3791 case TCP_ESTABLISHED: 3769 case TCP_ESTABLISHED:
3792 /* Move to CLOSE_WAIT */ 3770 /* Move to CLOSE_WAIT */
3793 tcp_set_state(sk, TCP_CLOSE_WAIT); 3771 tcp_set_state(sk, TCP_CLOSE_WAIT);
3794 inet_csk(sk)->icsk_ack.pingpong = 1; 3772 dst = __sk_dst_get(sk);
3773 if (!dst || !dst_metric(dst, RTAX_QUICKACK))
3774 inet_csk(sk)->icsk_ack.pingpong = 1;
3795 break; 3775 break;
3796 3776
3797 case TCP_CLOSE_WAIT: 3777 case TCP_CLOSE_WAIT:
@@ -4159,6 +4139,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4159 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { 4139 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4160 __skb_queue_after(&tp->out_of_order_queue, skb1, skb); 4140 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4161 } else { 4141 } else {
4142 tcp_grow_window(sk, skb);
4162 kfree_skb_partial(skb, fragstolen); 4143 kfree_skb_partial(skb, fragstolen);
4163 skb = NULL; 4144 skb = NULL;
4164 } 4145 }
@@ -4234,8 +4215,10 @@ add_sack:
4234 if (tcp_is_sack(tp)) 4215 if (tcp_is_sack(tp))
4235 tcp_sack_new_ofo_skb(sk, seq, end_seq); 4216 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4236end: 4217end:
4237 if (skb) 4218 if (skb) {
4219 tcp_grow_window(sk, skb);
4238 skb_set_owner_r(skb, sk); 4220 skb_set_owner_r(skb, sk);
4221 }
4239} 4222}
4240 4223
4241static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, 4224static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
@@ -5070,8 +5053,8 @@ discard:
5070 * the rest is checked inline. Fast processing is turned on in 5053 * the rest is checked inline. Fast processing is turned on in
5071 * tcp_data_queue when everything is OK. 5054 * tcp_data_queue when everything is OK.
5072 */ 5055 */
5073int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, 5056void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5074 const struct tcphdr *th, unsigned int len) 5057 const struct tcphdr *th, unsigned int len)
5075{ 5058{
5076 struct tcp_sock *tp = tcp_sk(sk); 5059 struct tcp_sock *tp = tcp_sk(sk);
5077 5060
@@ -5148,7 +5131,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5148 tcp_ack(sk, skb, 0); 5131 tcp_ack(sk, skb, 0);
5149 __kfree_skb(skb); 5132 __kfree_skb(skb);
5150 tcp_data_snd_check(sk); 5133 tcp_data_snd_check(sk);
5151 return 0; 5134 return;
5152 } else { /* Header too small */ 5135 } else { /* Header too small */
5153 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 5136 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5154 goto discard; 5137 goto discard;
@@ -5241,7 +5224,7 @@ no_ack:
5241 if (eaten) 5224 if (eaten)
5242 kfree_skb_partial(skb, fragstolen); 5225 kfree_skb_partial(skb, fragstolen);
5243 sk->sk_data_ready(sk, 0); 5226 sk->sk_data_ready(sk, 0);
5244 return 0; 5227 return;
5245 } 5228 }
5246 } 5229 }
5247 5230
@@ -5257,7 +5240,7 @@ slow_path:
5257 */ 5240 */
5258 5241
5259 if (!tcp_validate_incoming(sk, skb, th, 1)) 5242 if (!tcp_validate_incoming(sk, skb, th, 1))
5260 return 0; 5243 return;
5261 5244
5262step5: 5245step5:
5263 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) 5246 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
@@ -5273,7 +5256,7 @@ step5:
5273 5256
5274 tcp_data_snd_check(sk); 5257 tcp_data_snd_check(sk);
5275 tcp_ack_snd_check(sk); 5258 tcp_ack_snd_check(sk);
5276 return 0; 5259 return;
5277 5260
5278csum_error: 5261csum_error:
5279 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); 5262 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
@@ -5281,7 +5264,6 @@ csum_error:
5281 5264
5282discard: 5265discard:
5283 __kfree_skb(skb); 5266 __kfree_skb(skb);
5284 return 0;
5285} 5267}
5286EXPORT_SYMBOL(tcp_rcv_established); 5268EXPORT_SYMBOL(tcp_rcv_established);
5287 5269
@@ -5376,7 +5358,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5376 int saved_clamp = tp->rx_opt.mss_clamp; 5358 int saved_clamp = tp->rx_opt.mss_clamp;
5377 5359
5378 tcp_parse_options(skb, &tp->rx_opt, 0, &foc); 5360 tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
5379 if (tp->rx_opt.saw_tstamp) 5361 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
5380 tp->rx_opt.rcv_tsecr -= tp->tsoffset; 5362 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5381 5363
5382 if (th->ack) { 5364 if (th->ack) {
@@ -5601,6 +5583,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5601 struct inet_connection_sock *icsk = inet_csk(sk); 5583 struct inet_connection_sock *icsk = inet_csk(sk);
5602 struct request_sock *req; 5584 struct request_sock *req;
5603 int queued = 0; 5585 int queued = 0;
5586 bool acceptable;
5604 5587
5605 tp->rx_opt.saw_tstamp = 0; 5588 tp->rx_opt.saw_tstamp = 0;
5606 5589
@@ -5671,157 +5654,146 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5671 return 0; 5654 return 0;
5672 5655
5673 /* step 5: check the ACK field */ 5656 /* step 5: check the ACK field */
5674 if (true) { 5657 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
5675 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | 5658 FLAG_UPDATE_TS_RECENT) > 0;
5676 FLAG_UPDATE_TS_RECENT) > 0;
5677
5678 switch (sk->sk_state) {
5679 case TCP_SYN_RECV:
5680 if (acceptable) {
5681 /* Once we leave TCP_SYN_RECV, we no longer
5682 * need req so release it.
5683 */
5684 if (req) {
5685 tcp_synack_rtt_meas(sk, req);
5686 tp->total_retrans = req->num_retrans;
5687
5688 reqsk_fastopen_remove(sk, req, false);
5689 } else {
5690 /* Make sure socket is routed, for
5691 * correct metrics.
5692 */
5693 icsk->icsk_af_ops->rebuild_header(sk);
5694 tcp_init_congestion_control(sk);
5695 5659
5696 tcp_mtup_init(sk); 5660 switch (sk->sk_state) {
5697 tcp_init_buffer_space(sk); 5661 case TCP_SYN_RECV:
5698 tp->copied_seq = tp->rcv_nxt; 5662 if (!acceptable)
5699 } 5663 return 1;
5700 smp_mb();
5701 tcp_set_state(sk, TCP_ESTABLISHED);
5702 sk->sk_state_change(sk);
5703
5704 /* Note, that this wakeup is only for marginal
5705 * crossed SYN case. Passively open sockets
5706 * are not waked up, because sk->sk_sleep ==
5707 * NULL and sk->sk_socket == NULL.
5708 */
5709 if (sk->sk_socket)
5710 sk_wake_async(sk,
5711 SOCK_WAKE_IO, POLL_OUT);
5712
5713 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5714 tp->snd_wnd = ntohs(th->window) <<
5715 tp->rx_opt.snd_wscale;
5716 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5717
5718 if (tp->rx_opt.tstamp_ok)
5719 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5720
5721 if (req) {
5722 /* Re-arm the timer because data may
5723 * have been sent out. This is similar
5724 * to the regular data transmission case
5725 * when new data has just been ack'ed.
5726 *
5727 * (TFO) - we could try to be more
5728 * aggressive and retranmitting any data
5729 * sooner based on when they were sent
5730 * out.
5731 */
5732 tcp_rearm_rto(sk);
5733 } else
5734 tcp_init_metrics(sk);
5735 5664
5736 /* Prevent spurious tcp_cwnd_restart() on 5665 /* Once we leave TCP_SYN_RECV, we no longer need req
5737 * first data packet. 5666 * so release it.
5738 */ 5667 */
5739 tp->lsndtime = tcp_time_stamp; 5668 if (req) {
5669 tp->total_retrans = req->num_retrans;
5670 reqsk_fastopen_remove(sk, req, false);
5671 } else {
5672 /* Make sure socket is routed, for correct metrics. */
5673 icsk->icsk_af_ops->rebuild_header(sk);
5674 tcp_init_congestion_control(sk);
5740 5675
5741 tcp_initialize_rcv_mss(sk); 5676 tcp_mtup_init(sk);
5742 tcp_fast_path_on(tp); 5677 tcp_init_buffer_space(sk);
5743 } else { 5678 tp->copied_seq = tp->rcv_nxt;
5744 return 1; 5679 }
5745 } 5680 smp_mb();
5746 break; 5681 tcp_set_state(sk, TCP_ESTABLISHED);
5682 sk->sk_state_change(sk);
5683
5684 /* Note, that this wakeup is only for marginal crossed SYN case.
5685 * Passively open sockets are not waked up, because
5686 * sk->sk_sleep == NULL and sk->sk_socket == NULL.
5687 */
5688 if (sk->sk_socket)
5689 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5690
5691 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5692 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
5693 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5694 tcp_synack_rtt_meas(sk, req);
5695
5696 if (tp->rx_opt.tstamp_ok)
5697 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5747 5698
5748 case TCP_FIN_WAIT1: 5699 if (req) {
5749 /* If we enter the TCP_FIN_WAIT1 state and we are a 5700 /* Re-arm the timer because data may have been sent out.
5750 * Fast Open socket and this is the first acceptable 5701 * This is similar to the regular data transmission case
5751 * ACK we have received, this would have acknowledged 5702 * when new data has just been ack'ed.
5752 * our SYNACK so stop the SYNACK timer. 5703 *
5704 * (TFO) - we could try to be more aggressive and
5705 * retransmitting any data sooner based on when they
5706 * are sent out.
5753 */ 5707 */
5754 if (req != NULL) { 5708 tcp_rearm_rto(sk);
5755 /* Return RST if ack_seq is invalid. 5709 } else
5756 * Note that RFC793 only says to generate a 5710 tcp_init_metrics(sk);
5757 * DUPACK for it but for TCP Fast Open it seems
5758 * better to treat this case like TCP_SYN_RECV
5759 * above.
5760 */
5761 if (!acceptable)
5762 return 1;
5763 /* We no longer need the request sock. */
5764 reqsk_fastopen_remove(sk, req, false);
5765 tcp_rearm_rto(sk);
5766 }
5767 if (tp->snd_una == tp->write_seq) {
5768 struct dst_entry *dst;
5769
5770 tcp_set_state(sk, TCP_FIN_WAIT2);
5771 sk->sk_shutdown |= SEND_SHUTDOWN;
5772
5773 dst = __sk_dst_get(sk);
5774 if (dst)
5775 dst_confirm(dst);
5776
5777 if (!sock_flag(sk, SOCK_DEAD))
5778 /* Wake up lingering close() */
5779 sk->sk_state_change(sk);
5780 else {
5781 int tmo;
5782
5783 if (tp->linger2 < 0 ||
5784 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5785 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5786 tcp_done(sk);
5787 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5788 return 1;
5789 }
5790 5711
5791 tmo = tcp_fin_time(sk); 5712 /* Prevent spurious tcp_cwnd_restart() on first data packet */
5792 if (tmo > TCP_TIMEWAIT_LEN) { 5713 tp->lsndtime = tcp_time_stamp;
5793 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5794 } else if (th->fin || sock_owned_by_user(sk)) {
5795 /* Bad case. We could lose such FIN otherwise.
5796 * It is not a big problem, but it looks confusing
5797 * and not so rare event. We still can lose it now,
5798 * if it spins in bh_lock_sock(), but it is really
5799 * marginal case.
5800 */
5801 inet_csk_reset_keepalive_timer(sk, tmo);
5802 } else {
5803 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5804 goto discard;
5805 }
5806 }
5807 }
5808 break;
5809 5714
5810 case TCP_CLOSING: 5715 tcp_initialize_rcv_mss(sk);
5811 if (tp->snd_una == tp->write_seq) { 5716 tcp_fast_path_on(tp);
5812 tcp_time_wait(sk, TCP_TIME_WAIT, 0); 5717 break;
5813 goto discard; 5718
5814 } 5719 case TCP_FIN_WAIT1: {
5720 struct dst_entry *dst;
5721 int tmo;
5722
5723 /* If we enter the TCP_FIN_WAIT1 state and we are a
5724 * Fast Open socket and this is the first acceptable
5725 * ACK we have received, this would have acknowledged
5726 * our SYNACK so stop the SYNACK timer.
5727 */
5728 if (req != NULL) {
5729 /* Return RST if ack_seq is invalid.
5730 * Note that RFC793 only says to generate a
5731 * DUPACK for it but for TCP Fast Open it seems
5732 * better to treat this case like TCP_SYN_RECV
5733 * above.
5734 */
5735 if (!acceptable)
5736 return 1;
5737 /* We no longer need the request sock. */
5738 reqsk_fastopen_remove(sk, req, false);
5739 tcp_rearm_rto(sk);
5740 }
5741 if (tp->snd_una != tp->write_seq)
5815 break; 5742 break;
5816 5743
5817 case TCP_LAST_ACK: 5744 tcp_set_state(sk, TCP_FIN_WAIT2);
5818 if (tp->snd_una == tp->write_seq) { 5745 sk->sk_shutdown |= SEND_SHUTDOWN;
5819 tcp_update_metrics(sk); 5746
5820 tcp_done(sk); 5747 dst = __sk_dst_get(sk);
5821 goto discard; 5748 if (dst)
5822 } 5749 dst_confirm(dst);
5750
5751 if (!sock_flag(sk, SOCK_DEAD)) {
5752 /* Wake up lingering close() */
5753 sk->sk_state_change(sk);
5823 break; 5754 break;
5824 } 5755 }
5756
5757 if (tp->linger2 < 0 ||
5758 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5759 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5760 tcp_done(sk);
5761 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5762 return 1;
5763 }
5764
5765 tmo = tcp_fin_time(sk);
5766 if (tmo > TCP_TIMEWAIT_LEN) {
5767 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5768 } else if (th->fin || sock_owned_by_user(sk)) {
5769 /* Bad case. We could lose such FIN otherwise.
5770 * It is not a big problem, but it looks confusing
5771 * and not so rare event. We still can lose it now,
5772 * if it spins in bh_lock_sock(), but it is really
5773 * marginal case.
5774 */
5775 inet_csk_reset_keepalive_timer(sk, tmo);
5776 } else {
5777 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5778 goto discard;
5779 }
5780 break;
5781 }
5782
5783 case TCP_CLOSING:
5784 if (tp->snd_una == tp->write_seq) {
5785 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5786 goto discard;
5787 }
5788 break;
5789
5790 case TCP_LAST_ACK:
5791 if (tp->snd_una == tp->write_seq) {
5792 tcp_update_metrics(sk);
5793 tcp_done(sk);
5794 goto discard;
5795 }
5796 break;
5825 } 5797 }
5826 5798
5827 /* step 6: check the URG bit */ 5799 /* step 6: check the URG bit */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 719652305a29..b14266bb91eb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -75,6 +75,7 @@
75#include <net/netdma.h> 75#include <net/netdma.h>
76#include <net/secure_seq.h> 76#include <net/secure_seq.h>
77#include <net/tcp_memcontrol.h> 77#include <net/tcp_memcontrol.h>
78#include <net/busy_poll.h>
78 79
79#include <linux/inet.h> 80#include <linux/inet.h>
80#include <linux/ipv6.h> 81#include <linux/ipv6.h>
@@ -545,8 +546,7 @@ out:
545 sock_put(sk); 546 sock_put(sk);
546} 547}
547 548
548static void __tcp_v4_send_check(struct sk_buff *skb, 549void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
549 __be32 saddr, __be32 daddr)
550{ 550{
551 struct tcphdr *th = tcp_hdr(skb); 551 struct tcphdr *th = tcp_hdr(skb);
552 552
@@ -571,23 +571,6 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
571} 571}
572EXPORT_SYMBOL(tcp_v4_send_check); 572EXPORT_SYMBOL(tcp_v4_send_check);
573 573
574int tcp_v4_gso_send_check(struct sk_buff *skb)
575{
576 const struct iphdr *iph;
577 struct tcphdr *th;
578
579 if (!pskb_may_pull(skb, sizeof(*th)))
580 return -EINVAL;
581
582 iph = ip_hdr(skb);
583 th = tcp_hdr(skb);
584
585 th->check = 0;
586 skb->ip_summed = CHECKSUM_PARTIAL;
587 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
588 return 0;
589}
590
591/* 574/*
592 * This routine will send an RST to the other tcp. 575 * This routine will send an RST to the other tcp.
593 * 576 *
@@ -838,8 +821,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
838 */ 821 */
839static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 822static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
840 struct request_sock *req, 823 struct request_sock *req,
841 u16 queue_mapping, 824 u16 queue_mapping)
842 bool nocache)
843{ 825{
844 const struct inet_request_sock *ireq = inet_rsk(req); 826 const struct inet_request_sock *ireq = inet_rsk(req);
845 struct flowi4 fl4; 827 struct flowi4 fl4;
@@ -869,7 +851,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
869 851
870static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) 852static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
871{ 853{
872 int res = tcp_v4_send_synack(sk, NULL, req, 0, false); 854 int res = tcp_v4_send_synack(sk, NULL, req, 0);
873 855
874 if (!res) 856 if (!res)
875 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 857 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -907,7 +889,7 @@ bool tcp_syn_flood_action(struct sock *sk,
907 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 889 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
908 890
909 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; 891 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
910 if (!lopt->synflood_warned) { 892 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
911 lopt->synflood_warned = 1; 893 lopt->synflood_warned = 1;
912 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", 894 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
913 proto, ntohs(tcp_hdr(skb)->dest), msg); 895 proto, ntohs(tcp_hdr(skb)->dest), msg);
@@ -1003,7 +985,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1003 struct tcp_sock *tp = tcp_sk(sk); 985 struct tcp_sock *tp = tcp_sk(sk);
1004 struct tcp_md5sig_info *md5sig; 986 struct tcp_md5sig_info *md5sig;
1005 987
1006 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); 988 key = tcp_md5_do_lookup(sk, addr, family);
1007 if (key) { 989 if (key) {
1008 /* Pre-existing entry - just update that one. */ 990 /* Pre-existing entry - just update that one. */
1009 memcpy(key->key, newkey, newkeylen); 991 memcpy(key->key, newkey, newkeylen);
@@ -1026,7 +1008,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1026 key = sock_kmalloc(sk, sizeof(*key), gfp); 1008 key = sock_kmalloc(sk, sizeof(*key), gfp);
1027 if (!key) 1009 if (!key)
1028 return -ENOMEM; 1010 return -ENOMEM;
1029 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { 1011 if (!tcp_alloc_md5sig_pool()) {
1030 sock_kfree_s(sk, key, sizeof(*key)); 1012 sock_kfree_s(sk, key, sizeof(*key));
1031 return -ENOMEM; 1013 return -ENOMEM;
1032 } 1014 }
@@ -1044,20 +1026,14 @@ EXPORT_SYMBOL(tcp_md5_do_add);
1044 1026
1045int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) 1027int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1046{ 1028{
1047 struct tcp_sock *tp = tcp_sk(sk);
1048 struct tcp_md5sig_key *key; 1029 struct tcp_md5sig_key *key;
1049 struct tcp_md5sig_info *md5sig;
1050 1030
1051 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); 1031 key = tcp_md5_do_lookup(sk, addr, family);
1052 if (!key) 1032 if (!key)
1053 return -ENOENT; 1033 return -ENOENT;
1054 hlist_del_rcu(&key->node); 1034 hlist_del_rcu(&key->node);
1055 atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1035 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1056 kfree_rcu(key, rcu); 1036 kfree_rcu(key, rcu);
1057 md5sig = rcu_dereference_protected(tp->md5sig_info,
1058 sock_owned_by_user(sk));
1059 if (hlist_empty(&md5sig->head))
1060 tcp_free_md5sig_pool();
1061 return 0; 1037 return 0;
1062} 1038}
1063EXPORT_SYMBOL(tcp_md5_do_del); 1039EXPORT_SYMBOL(tcp_md5_do_del);
@@ -1071,8 +1047,6 @@ static void tcp_clear_md5_list(struct sock *sk)
1071 1047
1072 md5sig = rcu_dereference_protected(tp->md5sig_info, 1); 1048 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1073 1049
1074 if (!hlist_empty(&md5sig->head))
1075 tcp_free_md5sig_pool();
1076 hlist_for_each_entry_safe(key, n, &md5sig->head, node) { 1050 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1077 hlist_del_rcu(&key->node); 1051 hlist_del_rcu(&key->node);
1078 atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1052 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
@@ -1341,9 +1315,11 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1341 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1315 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1342 return true; 1316 return true;
1343 } 1317 }
1318
1344 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { 1319 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1345 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { 1320 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1346 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); 1321 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1322 ip_hdr(skb)->daddr, valid_foc);
1347 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || 1323 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1348 memcmp(&foc->val[0], &valid_foc->val[0], 1324 memcmp(&foc->val[0], &valid_foc->val[0],
1349 TCP_FASTOPEN_COOKIE_SIZE) != 0) 1325 TCP_FASTOPEN_COOKIE_SIZE) != 0)
@@ -1354,14 +1330,16 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1354 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1330 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1355 return true; 1331 return true;
1356 } else if (foc->len == 0) { /* Client requesting a cookie */ 1332 } else if (foc->len == 0) { /* Client requesting a cookie */
1357 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); 1333 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1334 ip_hdr(skb)->daddr, valid_foc);
1358 NET_INC_STATS_BH(sock_net(sk), 1335 NET_INC_STATS_BH(sock_net(sk),
1359 LINUX_MIB_TCPFASTOPENCOOKIEREQD); 1336 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1360 } else { 1337 } else {
1361 /* Client sent a cookie with wrong size. Treat it 1338 /* Client sent a cookie with wrong size. Treat it
1362 * the same as invalid and return a valid one. 1339 * the same as invalid and return a valid one.
1363 */ 1340 */
1364 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); 1341 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1342 ip_hdr(skb)->daddr, valid_foc);
1365 } 1343 }
1366 return false; 1344 return false;
1367} 1345}
@@ -1487,7 +1465,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1487 * limitations, they conserve resources and peer is 1465 * limitations, they conserve resources and peer is
1488 * evidently real one. 1466 * evidently real one.
1489 */ 1467 */
1490 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1468 if ((sysctl_tcp_syncookies == 2 ||
1469 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
1491 want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); 1470 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1492 if (!want_cookie) 1471 if (!want_cookie)
1493 goto drop; 1472 goto drop;
@@ -1696,8 +1675,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1696 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1675 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1697 1676
1698 tcp_initialize_rcv_mss(newsk); 1677 tcp_initialize_rcv_mss(newsk);
1699 tcp_synack_rtt_meas(newsk, req);
1700 newtp->total_retrans = req->num_retrans;
1701 1678
1702#ifdef CONFIG_TCP_MD5SIG 1679#ifdef CONFIG_TCP_MD5SIG
1703 /* Copy over the MD5 key from the original socket */ 1680 /* Copy over the MD5 key from the original socket */
@@ -1822,10 +1799,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1822 sk->sk_rx_dst = NULL; 1799 sk->sk_rx_dst = NULL;
1823 } 1800 }
1824 } 1801 }
1825 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1802 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1826 rsk = sk;
1827 goto reset;
1828 }
1829 return 0; 1803 return 0;
1830 } 1804 }
1831 1805
@@ -2019,6 +1993,7 @@ process:
2019 if (sk_filter(sk, skb)) 1993 if (sk_filter(sk, skb))
2020 goto discard_and_relse; 1994 goto discard_and_relse;
2021 1995
1996 sk_mark_napi_id(sk, skb);
2022 skb->dev = NULL; 1997 skb->dev = NULL;
2023 1998
2024 bh_lock_sock_nested(sk); 1999 bh_lock_sock_nested(sk);
@@ -2629,7 +2604,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2629 long delta = req->expires - jiffies; 2604 long delta = req->expires - jiffies;
2630 2605
2631 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2606 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2632 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", 2607 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n",
2633 i, 2608 i,
2634 ireq->loc_addr, 2609 ireq->loc_addr,
2635 ntohs(inet_sk(sk)->inet_sport), 2610 ntohs(inet_sk(sk)->inet_sport),
@@ -2687,7 +2662,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2687 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 2662 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2688 2663
2689 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2664 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2690 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n", 2665 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d%n",
2691 i, src, srcp, dest, destp, sk->sk_state, 2666 i, src, srcp, dest, destp, sk->sk_state,
2692 tp->write_seq - tp->snd_una, 2667 tp->write_seq - tp->snd_una,
2693 rx_queue, 2668 rx_queue,
@@ -2803,52 +2778,6 @@ void tcp4_proc_exit(void)
2803} 2778}
2804#endif /* CONFIG_PROC_FS */ 2779#endif /* CONFIG_PROC_FS */
2805 2780
2806struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2807{
2808 const struct iphdr *iph = skb_gro_network_header(skb);
2809 __wsum wsum;
2810 __sum16 sum;
2811
2812 switch (skb->ip_summed) {
2813 case CHECKSUM_COMPLETE:
2814 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2815 skb->csum)) {
2816 skb->ip_summed = CHECKSUM_UNNECESSARY;
2817 break;
2818 }
2819flush:
2820 NAPI_GRO_CB(skb)->flush = 1;
2821 return NULL;
2822
2823 case CHECKSUM_NONE:
2824 wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
2825 skb_gro_len(skb), IPPROTO_TCP, 0);
2826 sum = csum_fold(skb_checksum(skb,
2827 skb_gro_offset(skb),
2828 skb_gro_len(skb),
2829 wsum));
2830 if (sum)
2831 goto flush;
2832
2833 skb->ip_summed = CHECKSUM_UNNECESSARY;
2834 break;
2835 }
2836
2837 return tcp_gro_receive(head, skb);
2838}
2839
2840int tcp4_gro_complete(struct sk_buff *skb)
2841{
2842 const struct iphdr *iph = ip_hdr(skb);
2843 struct tcphdr *th = tcp_hdr(skb);
2844
2845 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2846 iph->saddr, iph->daddr, 0);
2847 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2848
2849 return tcp_gro_complete(skb);
2850}
2851
2852struct proto tcp_prot = { 2781struct proto tcp_prot = {
2853 .name = "TCP", 2782 .name = "TCP",
2854 .owner = THIS_MODULE, 2783 .owner = THIS_MODULE,
@@ -2872,6 +2801,7 @@ struct proto tcp_prot = {
2872 .unhash = inet_unhash, 2801 .unhash = inet_unhash,
2873 .get_port = inet_csk_get_port, 2802 .get_port = inet_csk_get_port,
2874 .enter_memory_pressure = tcp_enter_memory_pressure, 2803 .enter_memory_pressure = tcp_enter_memory_pressure,
2804 .stream_memory_free = tcp_stream_memory_free,
2875 .sockets_allocated = &tcp_sockets_allocated, 2805 .sockets_allocated = &tcp_sockets_allocated,
2876 .orphan_count = &tcp_orphan_count, 2806 .orphan_count = &tcp_orphan_count,
2877 .memory_allocated = &tcp_memory_allocated, 2807 .memory_allocated = &tcp_memory_allocated,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index da14436c1735..559d4ae6ebf4 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -87,8 +87,8 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
87 if (!cg_proto) 87 if (!cg_proto)
88 return -EINVAL; 88 return -EINVAL;
89 89
90 if (val > RESOURCE_MAX) 90 if (val > RES_COUNTER_MAX)
91 val = RESOURCE_MAX; 91 val = RES_COUNTER_MAX;
92 92
93 tcp = tcp_from_cgproto(cg_proto); 93 tcp = tcp_from_cgproto(cg_proto);
94 94
@@ -101,9 +101,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
101 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, 101 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
102 net->ipv4.sysctl_tcp_mem[i]); 102 net->ipv4.sysctl_tcp_mem[i]);
103 103
104 if (val == RESOURCE_MAX) 104 if (val == RES_COUNTER_MAX)
105 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); 105 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
106 else if (val != RESOURCE_MAX) { 106 else if (val != RES_COUNTER_MAX) {
107 /* 107 /*
108 * The active bit needs to be written after the static_key 108 * The active bit needs to be written after the static_key
109 * update. This is what guarantees that the socket activation 109 * update. This is what guarantees that the socket activation
@@ -132,10 +132,10 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
132 return 0; 132 return 0;
133} 133}
134 134
135static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, 135static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
136 const char *buffer) 136 const char *buffer)
137{ 137{
138 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 138 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
139 unsigned long long val; 139 unsigned long long val;
140 int ret = 0; 140 int ret = 0;
141 141
@@ -180,14 +180,14 @@ static u64 tcp_read_usage(struct mem_cgroup *memcg)
180 return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); 180 return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
181} 181}
182 182
183static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) 183static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
184{ 184{
185 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 185 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
186 u64 val; 186 u64 val;
187 187
188 switch (cft->private) { 188 switch (cft->private) {
189 case RES_LIMIT: 189 case RES_LIMIT:
190 val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); 190 val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
191 break; 191 break;
192 case RES_USAGE: 192 case RES_USAGE:
193 val = tcp_read_usage(memcg); 193 val = tcp_read_usage(memcg);
@@ -202,13 +202,13 @@ static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
202 return val; 202 return val;
203} 203}
204 204
205static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event) 205static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
206{ 206{
207 struct mem_cgroup *memcg; 207 struct mem_cgroup *memcg;
208 struct tcp_memcontrol *tcp; 208 struct tcp_memcontrol *tcp;
209 struct cg_proto *cg_proto; 209 struct cg_proto *cg_proto;
210 210
211 memcg = mem_cgroup_from_cont(cont); 211 memcg = mem_cgroup_from_css(css);
212 cg_proto = tcp_prot.proto_cgroup(memcg); 212 cg_proto = tcp_prot.proto_cgroup(memcg);
213 if (!cg_proto) 213 if (!cg_proto)
214 return 0; 214 return 0;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f6a005c485a9..52f3c6b971d2 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -443,7 +443,7 @@ void tcp_init_metrics(struct sock *sk)
443 struct dst_entry *dst = __sk_dst_get(sk); 443 struct dst_entry *dst = __sk_dst_get(sk);
444 struct tcp_sock *tp = tcp_sk(sk); 444 struct tcp_sock *tp = tcp_sk(sk);
445 struct tcp_metrics_block *tm; 445 struct tcp_metrics_block *tm;
446 u32 val; 446 u32 val, crtt = 0; /* cached RTT scaled by 8 */
447 447
448 if (dst == NULL) 448 if (dst == NULL)
449 goto reset; 449 goto reset;
@@ -478,15 +478,19 @@ void tcp_init_metrics(struct sock *sk)
478 tp->reordering = val; 478 tp->reordering = val;
479 } 479 }
480 480
481 val = tcp_metric_get(tm, TCP_METRIC_RTT); 481 crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
482 if (val == 0 || tp->srtt == 0) { 482 rcu_read_unlock();
483 rcu_read_unlock(); 483reset:
484 goto reset; 484 /* The initial RTT measurement from the SYN/SYN-ACK is not ideal
485 } 485 * to seed the RTO for later data packets because SYN packets are
486 /* Initial rtt is determined from SYN,SYN-ACK. 486 * small. Use the per-dst cached values to seed the RTO but keep
487 * The segment is small and rtt may appear much 487 * the RTT estimator variables intact (e.g., srtt, mdev, rttvar).
488 * less than real one. Use per-dst memory 488 * Later the RTO will be updated immediately upon obtaining the first
489 * to make it more realistic. 489 * data RTT sample (tcp_rtt_estimator()). Hence the cached RTT only
490 * influences the first RTO but not later RTT estimation.
491 *
492 * But if RTT is not available from the SYN (due to retransmits or
493 * syn cookies) or the cache, force a conservative 3secs timeout.
490 * 494 *
491 * A bit of theory. RTT is time passed after "normal" sized packet 495 * A bit of theory. RTT is time passed after "normal" sized packet
492 * is sent until it is ACKed. In normal circumstances sending small 496 * is sent until it is ACKed. In normal circumstances sending small
@@ -497,21 +501,11 @@ void tcp_init_metrics(struct sock *sk)
497 * to low value, and then abruptly stops to do it and starts to delay 501 * to low value, and then abruptly stops to do it and starts to delay
498 * ACKs, wait for troubles. 502 * ACKs, wait for troubles.
499 */ 503 */
500 val = msecs_to_jiffies(val); 504 if (crtt > tp->srtt) {
501 if (val > tp->srtt) { 505 /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
502 tp->srtt = val; 506 crtt >>= 3;
503 tp->rtt_seq = tp->snd_nxt; 507 inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
504 } 508 } else if (tp->srtt == 0) {
505 val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
506 if (val > tp->mdev) {
507 tp->mdev = val;
508 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
509 }
510 rcu_read_unlock();
511
512 tcp_set_rto(sk);
513reset:
514 if (tp->srtt == 0) {
515 /* RFC6298: 5.7 We've failed to get a valid RTT sample from 509 /* RFC6298: 5.7 We've failed to get a valid RTT sample from
516 * 3WHS. This is most likely due to retransmission, 510 * 3WHS. This is most likely due to retransmission,
517 * including spurious one. Reset the RTO back to 3secs 511 * including spurious one. Reset the RTO back to 3secs
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0f0178827259..58a3e69aef64 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -317,7 +317,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
317 key = tp->af_specific->md5_lookup(sk, sk); 317 key = tp->af_specific->md5_lookup(sk, sk);
318 if (key != NULL) { 318 if (key != NULL) {
319 tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); 319 tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
320 if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL) 320 if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool())
321 BUG(); 321 BUG();
322 } 322 }
323 } while (0); 323 } while (0);
@@ -358,10 +358,8 @@ void tcp_twsk_destructor(struct sock *sk)
358#ifdef CONFIG_TCP_MD5SIG 358#ifdef CONFIG_TCP_MD5SIG
359 struct tcp_timewait_sock *twsk = tcp_twsk(sk); 359 struct tcp_timewait_sock *twsk = tcp_twsk(sk);
360 360
361 if (twsk->tw_md5_key) { 361 if (twsk->tw_md5_key)
362 tcp_free_md5sig_pool();
363 kfree_rcu(twsk->tw_md5_key, rcu); 362 kfree_rcu(twsk->tw_md5_key, rcu);
364 }
365#endif 363#endif
366} 364}
367EXPORT_SYMBOL_GPL(tcp_twsk_destructor); 365EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
@@ -413,6 +411,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
413 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 411 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
414 tcp_enable_early_retrans(newtp); 412 tcp_enable_early_retrans(newtp);
415 newtp->tlp_high_seq = 0; 413 newtp->tlp_high_seq = 0;
414 newtp->lsndtime = treq->snt_synack;
415 newtp->total_retrans = req->num_retrans;
416 416
417 /* So many TCP implementations out there (incorrectly) count the 417 /* So many TCP implementations out there (incorrectly) count the
418 * initial SYN frame in their delayed-ACK and congestion control 418 * initial SYN frame in their delayed-ACK and congestion control
@@ -668,12 +668,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
668 if (!(flg & TCP_FLAG_ACK)) 668 if (!(flg & TCP_FLAG_ACK))
669 return NULL; 669 return NULL;
670 670
671 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
672 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
673 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
674 else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */
675 tcp_rsk(req)->snt_synack = 0;
676
677 /* For Fast Open no more processing is needed (sk is the 671 /* For Fast Open no more processing is needed (sk is the
678 * child socket). 672 * child socket).
679 */ 673 */
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
new file mode 100644
index 000000000000..3a7525e6c086
--- /dev/null
+++ b/net/ipv4/tcp_offload.c
@@ -0,0 +1,332 @@
1/*
2 * IPV4 GSO/GRO offload support
3 * Linux INET implementation
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * TCPv4 GSO/GRO support
11 */
12
13#include <linux/skbuff.h>
14#include <net/tcp.h>
15#include <net/protocol.h>
16
17struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
18 netdev_features_t features)
19{
20 struct sk_buff *segs = ERR_PTR(-EINVAL);
21 struct tcphdr *th;
22 unsigned int thlen;
23 unsigned int seq;
24 __be32 delta;
25 unsigned int oldlen;
26 unsigned int mss;
27 struct sk_buff *gso_skb = skb;
28 __sum16 newcheck;
29 bool ooo_okay, copy_destructor;
30
31 if (!pskb_may_pull(skb, sizeof(*th)))
32 goto out;
33
34 th = tcp_hdr(skb);
35 thlen = th->doff * 4;
36 if (thlen < sizeof(*th))
37 goto out;
38
39 if (!pskb_may_pull(skb, thlen))
40 goto out;
41
42 oldlen = (u16)~skb->len;
43 __skb_pull(skb, thlen);
44
45 mss = tcp_skb_mss(skb);
46 if (unlikely(skb->len <= mss))
47 goto out;
48
49 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
50 /* Packet is from an untrusted source, reset gso_segs. */
51 int type = skb_shinfo(skb)->gso_type;
52
53 if (unlikely(type &
54 ~(SKB_GSO_TCPV4 |
55 SKB_GSO_DODGY |
56 SKB_GSO_TCP_ECN |
57 SKB_GSO_TCPV6 |
58 SKB_GSO_GRE |
59 SKB_GSO_MPLS |
60 SKB_GSO_UDP_TUNNEL |
61 0) ||
62 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
63 goto out;
64
65 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
66
67 segs = NULL;
68 goto out;
69 }
70
71 copy_destructor = gso_skb->destructor == tcp_wfree;
72 ooo_okay = gso_skb->ooo_okay;
73 /* All segments but the first should have ooo_okay cleared */
74 skb->ooo_okay = 0;
75
76 segs = skb_segment(skb, features);
77 if (IS_ERR(segs))
78 goto out;
79
80 /* Only first segment might have ooo_okay set */
81 segs->ooo_okay = ooo_okay;
82
83 delta = htonl(oldlen + (thlen + mss));
84
85 skb = segs;
86 th = tcp_hdr(skb);
87 seq = ntohl(th->seq);
88
89 newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
90 (__force u32)delta));
91
92 do {
93 th->fin = th->psh = 0;
94 th->check = newcheck;
95
96 if (skb->ip_summed != CHECKSUM_PARTIAL)
97 th->check =
98 csum_fold(csum_partial(skb_transport_header(skb),
99 thlen, skb->csum));
100
101 seq += mss;
102 if (copy_destructor) {
103 skb->destructor = gso_skb->destructor;
104 skb->sk = gso_skb->sk;
105 /* {tcp|sock}_wfree() use exact truesize accounting :
106 * sum(skb->truesize) MUST be exactly be gso_skb->truesize
107 * So we account mss bytes of 'true size' for each segment.
108 * The last segment will contain the remaining.
109 */
110 skb->truesize = mss;
111 gso_skb->truesize -= mss;
112 }
113 skb = skb->next;
114 th = tcp_hdr(skb);
115
116 th->seq = htonl(seq);
117 th->cwr = 0;
118 } while (skb->next);
119
120 /* Following permits TCP Small Queues to work well with GSO :
121 * The callback to TCP stack will be called at the time last frag
122 * is freed at TX completion, and not right now when gso_skb
123 * is freed by GSO engine
124 */
125 if (copy_destructor) {
126 swap(gso_skb->sk, skb->sk);
127 swap(gso_skb->destructor, skb->destructor);
128 swap(gso_skb->truesize, skb->truesize);
129 }
130
131 delta = htonl(oldlen + (skb_tail_pointer(skb) -
132 skb_transport_header(skb)) +
133 skb->data_len);
134 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
135 (__force u32)delta));
136 if (skb->ip_summed != CHECKSUM_PARTIAL)
137 th->check = csum_fold(csum_partial(skb_transport_header(skb),
138 thlen, skb->csum));
139out:
140 return segs;
141}
142EXPORT_SYMBOL(tcp_tso_segment);
143
144struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
145{
146 struct sk_buff **pp = NULL;
147 struct sk_buff *p;
148 struct tcphdr *th;
149 struct tcphdr *th2;
150 unsigned int len;
151 unsigned int thlen;
152 __be32 flags;
153 unsigned int mss = 1;
154 unsigned int hlen;
155 unsigned int off;
156 int flush = 1;
157 int i;
158
159 off = skb_gro_offset(skb);
160 hlen = off + sizeof(*th);
161 th = skb_gro_header_fast(skb, off);
162 if (skb_gro_header_hard(skb, hlen)) {
163 th = skb_gro_header_slow(skb, hlen, off);
164 if (unlikely(!th))
165 goto out;
166 }
167
168 thlen = th->doff * 4;
169 if (thlen < sizeof(*th))
170 goto out;
171
172 hlen = off + thlen;
173 if (skb_gro_header_hard(skb, hlen)) {
174 th = skb_gro_header_slow(skb, hlen, off);
175 if (unlikely(!th))
176 goto out;
177 }
178
179 skb_gro_pull(skb, thlen);
180
181 len = skb_gro_len(skb);
182 flags = tcp_flag_word(th);
183
184 for (; (p = *head); head = &p->next) {
185 if (!NAPI_GRO_CB(p)->same_flow)
186 continue;
187
188 th2 = tcp_hdr(p);
189
190 if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
191 NAPI_GRO_CB(p)->same_flow = 0;
192 continue;
193 }
194
195 goto found;
196 }
197
198 goto out_check_final;
199
200found:
201 flush = NAPI_GRO_CB(p)->flush;
202 flush |= (__force int)(flags & TCP_FLAG_CWR);
203 flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
204 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
205 flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
206 for (i = sizeof(*th); i < thlen; i += 4)
207 flush |= *(u32 *)((u8 *)th + i) ^
208 *(u32 *)((u8 *)th2 + i);
209
210 mss = tcp_skb_mss(p);
211
212 flush |= (len - 1) >= mss;
213 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
214
215 if (flush || skb_gro_receive(head, skb)) {
216 mss = 1;
217 goto out_check_final;
218 }
219
220 p = *head;
221 th2 = tcp_hdr(p);
222 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
223
224out_check_final:
225 flush = len < mss;
226 flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
227 TCP_FLAG_RST | TCP_FLAG_SYN |
228 TCP_FLAG_FIN));
229
230 if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
231 pp = head;
232
233out:
234 NAPI_GRO_CB(skb)->flush |= flush;
235
236 return pp;
237}
238EXPORT_SYMBOL(tcp_gro_receive);
239
240int tcp_gro_complete(struct sk_buff *skb)
241{
242 struct tcphdr *th = tcp_hdr(skb);
243
244 skb->csum_start = skb_transport_header(skb) - skb->head;
245 skb->csum_offset = offsetof(struct tcphdr, check);
246 skb->ip_summed = CHECKSUM_PARTIAL;
247
248 skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
249
250 if (th->cwr)
251 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
252
253 return 0;
254}
255EXPORT_SYMBOL(tcp_gro_complete);
256
257static int tcp_v4_gso_send_check(struct sk_buff *skb)
258{
259 const struct iphdr *iph;
260 struct tcphdr *th;
261
262 if (!pskb_may_pull(skb, sizeof(*th)))
263 return -EINVAL;
264
265 iph = ip_hdr(skb);
266 th = tcp_hdr(skb);
267
268 th->check = 0;
269 skb->ip_summed = CHECKSUM_PARTIAL;
270 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
271 return 0;
272}
273
274static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
275{
276 const struct iphdr *iph = skb_gro_network_header(skb);
277 __wsum wsum;
278 __sum16 sum;
279
280 switch (skb->ip_summed) {
281 case CHECKSUM_COMPLETE:
282 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
283 skb->csum)) {
284 skb->ip_summed = CHECKSUM_UNNECESSARY;
285 break;
286 }
287flush:
288 NAPI_GRO_CB(skb)->flush = 1;
289 return NULL;
290
291 case CHECKSUM_NONE:
292 wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
293 skb_gro_len(skb), IPPROTO_TCP, 0);
294 sum = csum_fold(skb_checksum(skb,
295 skb_gro_offset(skb),
296 skb_gro_len(skb),
297 wsum));
298 if (sum)
299 goto flush;
300
301 skb->ip_summed = CHECKSUM_UNNECESSARY;
302 break;
303 }
304
305 return tcp_gro_receive(head, skb);
306}
307
308static int tcp4_gro_complete(struct sk_buff *skb)
309{
310 const struct iphdr *iph = ip_hdr(skb);
311 struct tcphdr *th = tcp_hdr(skb);
312
313 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
314 iph->saddr, iph->daddr, 0);
315 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
316
317 return tcp_gro_complete(skb);
318}
319
320static const struct net_offload tcpv4_offload = {
321 .callbacks = {
322 .gso_send_check = tcp_v4_gso_send_check,
323 .gso_segment = tcp_tso_segment,
324 .gro_receive = tcp4_gro_receive,
325 .gro_complete = tcp4_gro_complete,
326 },
327};
328
329int __init tcpv4_offload_init(void)
330{
331 return inet_add_offload(&tcpv4_offload, IPPROTO_TCP);
332}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ec335fabd5cc..e6bb8256e59f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,6 +65,9 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
65/* By default, RFC2861 behavior. */ 65/* By default, RFC2861 behavior. */
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67 67
68unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
69EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
70
68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
69 int push_one, gfp_t gfp); 72 int push_one, gfp_t gfp);
70 73
@@ -160,6 +163,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
160{ 163{
161 struct inet_connection_sock *icsk = inet_csk(sk); 164 struct inet_connection_sock *icsk = inet_csk(sk);
162 const u32 now = tcp_time_stamp; 165 const u32 now = tcp_time_stamp;
166 const struct dst_entry *dst = __sk_dst_get(sk);
163 167
164 if (sysctl_tcp_slow_start_after_idle && 168 if (sysctl_tcp_slow_start_after_idle &&
165 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)) 169 (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
@@ -170,8 +174,9 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
170 /* If it is a reply for ato after last received 174 /* If it is a reply for ato after last received
171 * packet, enter pingpong mode. 175 * packet, enter pingpong mode.
172 */ 176 */
173 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) 177 if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato &&
174 icsk->icsk_ack.pingpong = 1; 178 (!dst || !dst_metric(dst, RTAX_QUICKACK)))
179 icsk->icsk_ack.pingpong = 1;
175} 180}
176 181
177/* Account for an ACK we sent. */ 182/* Account for an ACK we sent. */
@@ -181,6 +186,21 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
181 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 186 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
182} 187}
183 188
189
190u32 tcp_default_init_rwnd(u32 mss)
191{
192 /* Initial receive window should be twice of TCP_INIT_CWND to
193 * enable proper sending of new unsent data during fast recovery
194 * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
195 * limit when mss is larger than 1460.
196 */
197 u32 init_rwnd = TCP_INIT_CWND * 2;
198
199 if (mss > 1460)
200 init_rwnd = max((1460 * init_rwnd) / mss, 2U);
201 return init_rwnd;
202}
203
184/* Determine a window scaling and initial window to offer. 204/* Determine a window scaling and initial window to offer.
185 * Based on the assumption that the given amount of space 205 * Based on the assumption that the given amount of space
186 * will be offered. Store the results in the tp structure. 206 * will be offered. Store the results in the tp structure.
@@ -230,22 +250,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
230 } 250 }
231 } 251 }
232 252
233 /* Set initial window to a value enough for senders starting with
234 * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
235 * a limit on the initial window when mss is larger than 1460.
236 */
237 if (mss > (1 << *rcv_wscale)) { 253 if (mss > (1 << *rcv_wscale)) {
238 int init_cwnd = TCP_DEFAULT_INIT_RCVWND; 254 if (!init_rcv_wnd) /* Use default unless specified otherwise */
239 if (mss > 1460) 255 init_rcv_wnd = tcp_default_init_rwnd(mss);
240 init_cwnd = 256 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
241 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
242 /* when initializing use the value from init_rcv_wnd
243 * rather than the default from above
244 */
245 if (init_rcv_wnd)
246 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
247 else
248 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
249 } 257 }
250 258
251 /* Set the clamp no higher than max representable value */ 259 /* Set the clamp no higher than max representable value */
@@ -887,8 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
887 895
888 skb_orphan(skb); 896 skb_orphan(skb);
889 skb->sk = sk; 897 skb->sk = sk;
890 skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? 898 skb->destructor = tcp_wfree;
891 tcp_wfree : sock_wfree;
892 atomic_add(skb->truesize, &sk->sk_wmem_alloc); 899 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
893 900
894 /* Build TCP header and checksum it. */ 901 /* Build TCP header and checksum it. */
@@ -1623,7 +1630,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1623 1630
1624 /* If a full-sized TSO skb can be sent, do it. */ 1631 /* If a full-sized TSO skb can be sent, do it. */
1625 if (limit >= min_t(unsigned int, sk->sk_gso_max_size, 1632 if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
1626 sk->sk_gso_max_segs * tp->mss_cache)) 1633 tp->xmit_size_goal_segs * tp->mss_cache))
1627 goto send_now; 1634 goto send_now;
1628 1635
1629 /* Middle in queue won't get any more data, full sendable already? */ 1636 /* Middle in queue won't get any more data, full sendable already? */
@@ -1832,7 +1839,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1832 while ((skb = tcp_send_head(sk))) { 1839 while ((skb = tcp_send_head(sk))) {
1833 unsigned int limit; 1840 unsigned int limit;
1834 1841
1835
1836 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1842 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1837 BUG_ON(!tso_segs); 1843 BUG_ON(!tso_segs);
1838 1844
@@ -1861,13 +1867,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1861 break; 1867 break;
1862 } 1868 }
1863 1869
1864 /* TSQ : sk_wmem_alloc accounts skb truesize, 1870 /* TCP Small Queues :
1865 * including skb overhead. But thats OK. 1871 * Control number of packets in qdisc/devices to two packets / or ~1 ms.
1872 * This allows for :
1873 * - better RTT estimation and ACK scheduling
1874 * - faster recovery
1875 * - high rates
1866 */ 1876 */
1867 if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { 1877 limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
1878
1879 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
1868 set_bit(TSQ_THROTTLED, &tp->tsq_flags); 1880 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
1869 break; 1881 break;
1870 } 1882 }
1883
1871 limit = mss_now; 1884 limit = mss_now;
1872 if (tso_segs > 1 && !tcp_urg_mode(tp)) 1885 if (tso_segs > 1 && !tcp_urg_mode(tp))
1873 limit = tcp_mss_split_point(sk, skb, mss_now, 1886 limit = tcp_mss_split_point(sk, skb, mss_now,
@@ -2402,6 +2415,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2402 * see tcp_input.c tcp_sacktag_write_queue(). 2415 * see tcp_input.c tcp_sacktag_write_queue().
2403 */ 2416 */
2404 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; 2417 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2418 } else {
2419 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2405 } 2420 }
2406 return err; 2421 return err;
2407} 2422}
@@ -2523,10 +2538,9 @@ begin_fwd:
2523 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) 2538 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2524 continue; 2539 continue;
2525 2540
2526 if (tcp_retransmit_skb(sk, skb)) { 2541 if (tcp_retransmit_skb(sk, skb))
2527 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2528 return; 2542 return;
2529 } 2543
2530 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2544 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2531 2545
2532 if (tcp_in_cwnd_reduction(sk)) 2546 if (tcp_in_cwnd_reduction(sk))
@@ -2664,7 +2678,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2664 int tcp_header_size; 2678 int tcp_header_size;
2665 int mss; 2679 int mss;
2666 2680
2667 skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); 2681 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
2668 if (unlikely(!skb)) { 2682 if (unlikely(!skb)) {
2669 dst_release(dst); 2683 dst_release(dst);
2670 return NULL; 2684 return NULL;
@@ -2808,6 +2822,8 @@ void tcp_connect_init(struct sock *sk)
2808 2822
2809 if (likely(!tp->repair)) 2823 if (likely(!tp->repair))
2810 tp->rcv_nxt = 0; 2824 tp->rcv_nxt = 0;
2825 else
2826 tp->rcv_tstamp = tcp_time_stamp;
2811 tp->rcv_wup = tp->rcv_nxt; 2827 tp->rcv_wup = tp->rcv_nxt;
2812 tp->copied_seq = tp->rcv_nxt; 2828 tp->copied_seq = tp->rcv_nxt;
2813 2829
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index d4943f67aff2..611beab38a00 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -46,6 +46,10 @@ static unsigned int bufsize __read_mostly = 4096;
46MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); 46MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
47module_param(bufsize, uint, 0); 47module_param(bufsize, uint, 0);
48 48
49static unsigned int fwmark __read_mostly = 0;
50MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
51module_param(fwmark, uint, 0);
52
49static int full __read_mostly; 53static int full __read_mostly;
50MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); 54MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
51module_param(full, int, 0); 55module_param(full, int, 0);
@@ -54,12 +58,16 @@ static const char procname[] = "tcpprobe";
54 58
55struct tcp_log { 59struct tcp_log {
56 ktime_t tstamp; 60 ktime_t tstamp;
57 __be32 saddr, daddr; 61 union {
58 __be16 sport, dport; 62 struct sockaddr raw;
63 struct sockaddr_in v4;
64 struct sockaddr_in6 v6;
65 } src, dst;
59 u16 length; 66 u16 length;
60 u32 snd_nxt; 67 u32 snd_nxt;
61 u32 snd_una; 68 u32 snd_una;
62 u32 snd_wnd; 69 u32 snd_wnd;
70 u32 rcv_wnd;
63 u32 snd_cwnd; 71 u32 snd_cwnd;
64 u32 ssthresh; 72 u32 ssthresh;
65 u32 srtt; 73 u32 srtt;
@@ -86,19 +94,45 @@ static inline int tcp_probe_avail(void)
86 return bufsize - tcp_probe_used() - 1; 94 return bufsize - tcp_probe_used() - 1;
87} 95}
88 96
97#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \
98 do { \
99 si4.sin_family = AF_INET; \
100 si4.sin_port = inet->inet_##mem##port; \
101 si4.sin_addr.s_addr = inet->inet_##mem##addr; \
102 } while (0) \
103
104#if IS_ENABLED(CONFIG_IPV6)
105#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \
106 do { \
107 struct ipv6_pinfo *pi6 = inet->pinet6; \
108 si6.sin6_family = AF_INET6; \
109 si6.sin6_port = inet->inet_##mem##port; \
110 si6.sin6_addr = pi6->mem##addr; \
111 si6.sin6_flowinfo = 0; /* No need here. */ \
112 si6.sin6_scope_id = 0; /* No need here. */ \
113 } while (0)
114#else
115#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \
116 do { \
117 memset(&si6, 0, sizeof(si6)); \
118 } while (0)
119#endif
120
89/* 121/*
90 * Hook inserted to be called before each receive packet. 122 * Hook inserted to be called before each receive packet.
91 * Note: arguments must match tcp_rcv_established()! 123 * Note: arguments must match tcp_rcv_established()!
92 */ 124 */
93static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, 125static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
94 struct tcphdr *th, unsigned int len) 126 const struct tcphdr *th, unsigned int len)
95{ 127{
96 const struct tcp_sock *tp = tcp_sk(sk); 128 const struct tcp_sock *tp = tcp_sk(sk);
97 const struct inet_sock *inet = inet_sk(sk); 129 const struct inet_sock *inet = inet_sk(sk);
98 130
99 /* Only update if port matches */ 131 /* Only update if port or skb mark matches */
100 if ((port == 0 || ntohs(inet->inet_dport) == port || 132 if (((port == 0 && fwmark == 0) ||
101 ntohs(inet->inet_sport) == port) && 133 ntohs(inet->inet_dport) == port ||
134 ntohs(inet->inet_sport) == port ||
135 (fwmark > 0 && skb->mark == fwmark)) &&
102 (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { 136 (full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
103 137
104 spin_lock(&tcp_probe.lock); 138 spin_lock(&tcp_probe.lock);
@@ -107,15 +141,25 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
107 struct tcp_log *p = tcp_probe.log + tcp_probe.head; 141 struct tcp_log *p = tcp_probe.log + tcp_probe.head;
108 142
109 p->tstamp = ktime_get(); 143 p->tstamp = ktime_get();
110 p->saddr = inet->inet_saddr; 144 switch (sk->sk_family) {
111 p->sport = inet->inet_sport; 145 case AF_INET:
112 p->daddr = inet->inet_daddr; 146 tcp_probe_copy_fl_to_si4(inet, p->src.v4, s);
113 p->dport = inet->inet_dport; 147 tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d);
148 break;
149 case AF_INET6:
150 tcp_probe_copy_fl_to_si6(inet, p->src.v6, s);
151 tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d);
152 break;
153 default:
154 BUG();
155 }
156
114 p->length = skb->len; 157 p->length = skb->len;
115 p->snd_nxt = tp->snd_nxt; 158 p->snd_nxt = tp->snd_nxt;
116 p->snd_una = tp->snd_una; 159 p->snd_una = tp->snd_una;
117 p->snd_cwnd = tp->snd_cwnd; 160 p->snd_cwnd = tp->snd_cwnd;
118 p->snd_wnd = tp->snd_wnd; 161 p->snd_wnd = tp->snd_wnd;
162 p->rcv_wnd = tp->rcv_wnd;
119 p->ssthresh = tcp_current_ssthresh(sk); 163 p->ssthresh = tcp_current_ssthresh(sk);
120 p->srtt = tp->srtt >> 3; 164 p->srtt = tp->srtt >> 3;
121 165
@@ -128,7 +172,6 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
128 } 172 }
129 173
130 jprobe_return(); 174 jprobe_return();
131 return 0;
132} 175}
133 176
134static struct jprobe tcp_jprobe = { 177static struct jprobe tcp_jprobe = {
@@ -157,13 +200,11 @@ static int tcpprobe_sprint(char *tbuf, int n)
157 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); 200 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
158 201
159 return scnprintf(tbuf, n, 202 return scnprintf(tbuf, n,
160 "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", 203 "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n",
161 (unsigned long) tv.tv_sec, 204 (unsigned long) tv.tv_sec,
162 (unsigned long) tv.tv_nsec, 205 (unsigned long) tv.tv_nsec,
163 &p->saddr, ntohs(p->sport), 206 &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una,
164 &p->daddr, ntohs(p->dport), 207 p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd);
165 p->length, p->snd_nxt, p->snd_una,
166 p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt);
167} 208}
168 209
169static ssize_t tcpprobe_read(struct file *file, char __user *buf, 210static ssize_t tcpprobe_read(struct file *file, char __user *buf,
@@ -176,7 +217,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf,
176 return -EINVAL; 217 return -EINVAL;
177 218
178 while (cnt < len) { 219 while (cnt < len) {
179 char tbuf[164]; 220 char tbuf[256];
180 int width; 221 int width;
181 222
182 /* Wait for data in buffer */ 223 /* Wait for data in buffer */
@@ -223,6 +264,13 @@ static __init int tcpprobe_init(void)
223{ 264{
224 int ret = -ENOMEM; 265 int ret = -ENOMEM;
225 266
267 /* Warning: if the function signature of tcp_rcv_established,
268 * has been changed, you also have to change the signature of
269 * jtcp_rcv_established, otherwise you end up right here!
270 */
271 BUILD_BUG_ON(__same_type(tcp_rcv_established,
272 jtcp_rcv_established) == 0);
273
226 init_waitqueue_head(&tcp_probe.wait); 274 init_waitqueue_head(&tcp_probe.wait);
227 spin_lock_init(&tcp_probe.lock); 275 spin_lock_init(&tcp_probe.lock);
228 276
@@ -241,7 +289,8 @@ static __init int tcpprobe_init(void)
241 if (ret) 289 if (ret)
242 goto err1; 290 goto err1;
243 291
244 pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize); 292 pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
293 port, fwmark, bufsize);
245 return 0; 294 return 0;
246 err1: 295 err1:
247 remove_proc_entry(procname, init_net.proc_net); 296 remove_proc_entry(procname, init_net.proc_net);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0bf5d399a03c..0ca44df51ee9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -109,6 +109,7 @@
109#include <trace/events/udp.h> 109#include <trace/events/udp.h>
110#include <linux/static_key.h> 110#include <linux/static_key.h>
111#include <trace/events/skb.h> 111#include <trace/events/skb.h>
112#include <net/busy_poll.h>
112#include "udp_impl.h" 113#include "udp_impl.h"
113 114
114struct udp_table udp_table __read_mostly; 115struct udp_table udp_table __read_mostly;
@@ -429,7 +430,7 @@ begin:
429 reuseport = sk->sk_reuseport; 430 reuseport = sk->sk_reuseport;
430 if (reuseport) { 431 if (reuseport) {
431 hash = inet_ehashfn(net, daddr, hnum, 432 hash = inet_ehashfn(net, daddr, hnum,
432 saddr, htons(sport)); 433 saddr, sport);
433 matches = 1; 434 matches = 1;
434 } 435 }
435 } else if (score == badness && reuseport) { 436 } else if (score == badness && reuseport) {
@@ -510,7 +511,7 @@ begin:
510 reuseport = sk->sk_reuseport; 511 reuseport = sk->sk_reuseport;
511 if (reuseport) { 512 if (reuseport) {
512 hash = inet_ehashfn(net, daddr, hnum, 513 hash = inet_ehashfn(net, daddr, hnum,
513 saddr, htons(sport)); 514 saddr, sport);
514 matches = 1; 515 matches = 1;
515 } 516 }
516 } else if (score == badness && reuseport) { 517 } else if (score == badness && reuseport) {
@@ -657,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
657 break; 658 break;
658 case ICMP_REDIRECT: 659 case ICMP_REDIRECT:
659 ipv4_sk_redirect(skb, sk); 660 ipv4_sk_redirect(skb, sk);
660 break; 661 goto out;
661 } 662 }
662 663
663 /* 664 /*
@@ -703,7 +704,7 @@ EXPORT_SYMBOL(udp_flush_pending_frames);
703 * @src: source IP address 704 * @src: source IP address
704 * @dst: destination IP address 705 * @dst: destination IP address
705 */ 706 */
706static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) 707void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
707{ 708{
708 struct udphdr *uh = udp_hdr(skb); 709 struct udphdr *uh = udp_hdr(skb);
709 struct sk_buff *frags = skb_shinfo(skb)->frag_list; 710 struct sk_buff *frags = skb_shinfo(skb)->frag_list;
@@ -739,6 +740,7 @@ static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
739 uh->check = CSUM_MANGLED_0; 740 uh->check = CSUM_MANGLED_0;
740 } 741 }
741} 742}
743EXPORT_SYMBOL_GPL(udp4_hwcsum);
742 744
743static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) 745static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
744{ 746{
@@ -799,7 +801,7 @@ send:
799/* 801/*
800 * Push out all pending data as one UDP datagram. Socket is locked. 802 * Push out all pending data as one UDP datagram. Socket is locked.
801 */ 803 */
802static int udp_push_pending_frames(struct sock *sk) 804int udp_push_pending_frames(struct sock *sk)
803{ 805{
804 struct udp_sock *up = udp_sk(sk); 806 struct udp_sock *up = udp_sk(sk);
805 struct inet_sock *inet = inet_sk(sk); 807 struct inet_sock *inet = inet_sk(sk);
@@ -818,6 +820,7 @@ out:
818 up->pending = 0; 820 up->pending = 0;
819 return err; 821 return err;
820} 822}
823EXPORT_SYMBOL(udp_push_pending_frames);
821 824
822int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 825int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
823 size_t len) 826 size_t len)
@@ -1709,7 +1712,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
1709 sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); 1712 sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
1710 1713
1711 if (sk != NULL) { 1714 if (sk != NULL) {
1712 int ret = udp_queue_rcv_skb(sk, skb); 1715 int ret;
1716
1717 sk_mark_napi_id(sk, skb);
1718 ret = udp_queue_rcv_skb(sk, skb);
1713 sock_put(sk); 1719 sock_put(sk);
1714 1720
1715 /* a return value > 0 means to resubmit the input, but 1721 /* a return value > 0 means to resubmit the input, but
@@ -1967,6 +1973,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1967 unsigned int mask = datagram_poll(file, sock, wait); 1973 unsigned int mask = datagram_poll(file, sock, wait);
1968 struct sock *sk = sock->sk; 1974 struct sock *sk = sock->sk;
1969 1975
1976 sock_rps_record_flow(sk);
1977
1970 /* Check for false positives due to checksum errors */ 1978 /* Check for false positives due to checksum errors */
1971 if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && 1979 if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
1972 !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) 1980 !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
@@ -2151,7 +2159,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
2151 __u16 srcp = ntohs(inet->inet_sport); 2159 __u16 srcp = ntohs(inet->inet_sport);
2152 2160
2153 seq_printf(f, "%5d: %08X:%04X %08X:%04X" 2161 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
2154 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n", 2162 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n",
2155 bucket, src, srcp, dest, destp, sp->sk_state, 2163 bucket, src, srcp, dest, destp, sp->sk_state,
2156 sk_wmem_alloc_get(sp), 2164 sk_wmem_alloc_get(sp),
2157 sk_rmem_alloc_get(sp), 2165 sk_rmem_alloc_get(sp),
@@ -2284,29 +2292,8 @@ void __init udp_init(void)
2284 sysctl_udp_wmem_min = SK_MEM_QUANTUM; 2292 sysctl_udp_wmem_min = SK_MEM_QUANTUM;
2285} 2293}
2286 2294
2287int udp4_ufo_send_check(struct sk_buff *skb) 2295struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2288{ 2296 netdev_features_t features)
2289 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
2290 return -EINVAL;
2291
2292 if (likely(!skb->encapsulation)) {
2293 const struct iphdr *iph;
2294 struct udphdr *uh;
2295
2296 iph = ip_hdr(skb);
2297 uh = udp_hdr(skb);
2298
2299 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
2300 IPPROTO_UDP, 0);
2301 skb->csum_start = skb_transport_header(skb) - skb->head;
2302 skb->csum_offset = offsetof(struct udphdr, check);
2303 skb->ip_summed = CHECKSUM_PARTIAL;
2304 }
2305 return 0;
2306}
2307
2308static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2309 netdev_features_t features)
2310{ 2297{
2311 struct sk_buff *segs = ERR_PTR(-EINVAL); 2298 struct sk_buff *segs = ERR_PTR(-EINVAL);
2312 int mac_len = skb->mac_len; 2299 int mac_len = skb->mac_len;
@@ -2337,6 +2324,9 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2337 struct udphdr *uh; 2324 struct udphdr *uh;
2338 int udp_offset = outer_hlen - tnl_hlen; 2325 int udp_offset = outer_hlen - tnl_hlen;
2339 2326
2327 skb_reset_inner_headers(skb);
2328 skb->encapsulation = 1;
2329
2340 skb->mac_len = mac_len; 2330 skb->mac_len = mac_len;
2341 2331
2342 skb_push(skb, outer_hlen); 2332 skb_push(skb, outer_hlen);
@@ -2347,7 +2337,7 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2347 uh->len = htons(skb->len - udp_offset); 2337 uh->len = htons(skb->len - udp_offset);
2348 2338
2349 /* csum segment if tunnel sets skb with csum. */ 2339 /* csum segment if tunnel sets skb with csum. */
2350 if (unlikely(uh->check)) { 2340 if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) {
2351 struct iphdr *iph = ip_hdr(skb); 2341 struct iphdr *iph = ip_hdr(skb);
2352 2342
2353 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 2343 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
@@ -2358,60 +2348,20 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2358 if (uh->check == 0) 2348 if (uh->check == 0)
2359 uh->check = CSUM_MANGLED_0; 2349 uh->check = CSUM_MANGLED_0;
2360 2350
2351 } else if (protocol == htons(ETH_P_IPV6)) {
2352 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
2353 u32 len = skb->len - udp_offset;
2354
2355 uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
2356 len, IPPROTO_UDP, 0);
2357 uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0));
2358 if (uh->check == 0)
2359 uh->check = CSUM_MANGLED_0;
2360 skb->ip_summed = CHECKSUM_NONE;
2361 } 2361 }
2362 skb->ip_summed = CHECKSUM_NONE; 2362
2363 skb->protocol = protocol; 2363 skb->protocol = protocol;
2364 } while ((skb = skb->next)); 2364 } while ((skb = skb->next));
2365out: 2365out:
2366 return segs; 2366 return segs;
2367} 2367}
2368
2369struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
2370 netdev_features_t features)
2371{
2372 struct sk_buff *segs = ERR_PTR(-EINVAL);
2373 unsigned int mss;
2374 mss = skb_shinfo(skb)->gso_size;
2375 if (unlikely(skb->len <= mss))
2376 goto out;
2377
2378 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
2379 /* Packet is from an untrusted source, reset gso_segs. */
2380 int type = skb_shinfo(skb)->gso_type;
2381
2382 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
2383 SKB_GSO_UDP_TUNNEL |
2384 SKB_GSO_GRE) ||
2385 !(type & (SKB_GSO_UDP))))
2386 goto out;
2387
2388 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
2389
2390 segs = NULL;
2391 goto out;
2392 }
2393
2394 /* Fragment the skb. IP headers of the fragments are updated in
2395 * inet_gso_segment()
2396 */
2397 if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
2398 segs = skb_udp_tunnel_segment(skb, features);
2399 else {
2400 int offset;
2401 __wsum csum;
2402
2403 /* Do software UFO. Complete and fill in the UDP checksum as
2404 * HW cannot do checksum of UDP packets sent as multiple
2405 * IP fragments.
2406 */
2407 offset = skb_checksum_start_offset(skb);
2408 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2409 offset += skb->csum_offset;
2410 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2411 skb->ip_summed = CHECKSUM_NONE;
2412
2413 segs = skb_segment(skb, features);
2414 }
2415out:
2416 return segs;
2417}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
new file mode 100644
index 000000000000..f35eccaa855e
--- /dev/null
+++ b/net/ipv4/udp_offload.c
@@ -0,0 +1,100 @@
1/*
2 * IPV4 GSO/GRO offload support
3 * Linux INET implementation
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * UDPv4 GSO support
11 */
12
13#include <linux/skbuff.h>
14#include <net/udp.h>
15#include <net/protocol.h>
16
17static int udp4_ufo_send_check(struct sk_buff *skb)
18{
19 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
20 return -EINVAL;
21
22 if (likely(!skb->encapsulation)) {
23 const struct iphdr *iph;
24 struct udphdr *uh;
25
26 iph = ip_hdr(skb);
27 uh = udp_hdr(skb);
28
29 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
30 IPPROTO_UDP, 0);
31 skb->csum_start = skb_transport_header(skb) - skb->head;
32 skb->csum_offset = offsetof(struct udphdr, check);
33 skb->ip_summed = CHECKSUM_PARTIAL;
34 }
35
36 return 0;
37}
38
39static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
40 netdev_features_t features)
41{
42 struct sk_buff *segs = ERR_PTR(-EINVAL);
43 unsigned int mss;
44
45 mss = skb_shinfo(skb)->gso_size;
46 if (unlikely(skb->len <= mss))
47 goto out;
48
49 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
50 /* Packet is from an untrusted source, reset gso_segs. */
51 int type = skb_shinfo(skb)->gso_type;
52
53 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
54 SKB_GSO_UDP_TUNNEL |
55 SKB_GSO_GRE | SKB_GSO_MPLS) ||
56 !(type & (SKB_GSO_UDP))))
57 goto out;
58
59 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
60
61 segs = NULL;
62 goto out;
63 }
64
65 /* Fragment the skb. IP headers of the fragments are updated in
66 * inet_gso_segment()
67 */
68 if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
69 segs = skb_udp_tunnel_segment(skb, features);
70 else {
71 int offset;
72 __wsum csum;
73
74 /* Do software UFO. Complete and fill in the UDP checksum as
75 * HW cannot do checksum of UDP packets sent as multiple
76 * IP fragments.
77 */
78 offset = skb_checksum_start_offset(skb);
79 csum = skb_checksum(skb, offset, skb->len - offset, 0);
80 offset += skb->csum_offset;
81 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
82 skb->ip_summed = CHECKSUM_NONE;
83
84 segs = skb_segment(skb, features);
85 }
86out:
87 return segs;
88}
89
90static const struct net_offload udpv4_offload = {
91 .callbacks = {
92 .gso_send_check = udp4_ufo_send_check,
93 .gso_segment = udp4_ufo_fragment,
94 },
95};
96
97int __init udpv4_offload_init(void)
98{
99 return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
100}
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index eb1dd4d643f2..b5663c37f089 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -117,7 +117,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
117 117
118 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 118 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
119 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); 119 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
120 ip_select_ident(top_iph, dst->child, NULL); 120 ip_select_ident(skb, dst->child, NULL);
121 121
122 top_iph->ttl = ip4_dst_hoplimit(dst->child); 122 top_iph->ttl = ip4_dst_hoplimit(dst->child);
123 123
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 327a617d594c..baa0f63731fd 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -21,7 +21,6 @@
21static int xfrm4_tunnel_check_size(struct sk_buff *skb) 21static int xfrm4_tunnel_check_size(struct sk_buff *skb)
22{ 22{
23 int mtu, ret = 0; 23 int mtu, ret = 0;
24 struct dst_entry *dst;
25 24
26 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) 25 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
27 goto out; 26 goto out;
@@ -29,12 +28,10 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
29 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) 28 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
30 goto out; 29 goto out;
31 30
32 dst = skb_dst(skb); 31 mtu = dst_mtu(skb_dst(skb));
33 mtu = dst_mtu(dst);
34 if (skb->len > mtu) { 32 if (skb->len > mtu) {
35 if (skb->sk) 33 if (skb->sk)
36 ip_local_error(skb->sk, EMSGSIZE, ip_hdr(skb)->daddr, 34 xfrm_local_error(skb, mtu);
37 inet_sk(skb->sk)->inet_dport, mtu);
38 else 35 else
39 icmp_send(skb, ICMP_DEST_UNREACH, 36 icmp_send(skb, ICMP_DEST_UNREACH,
40 ICMP_FRAG_NEEDED, htonl(mtu)); 37 ICMP_FRAG_NEEDED, htonl(mtu));
@@ -99,3 +96,12 @@ int xfrm4_output(struct sk_buff *skb)
99 x->outer_mode->afinfo->output_finish, 96 x->outer_mode->afinfo->output_finish,
100 !(IPCB(skb)->flags & IPSKB_REROUTED)); 97 !(IPCB(skb)->flags & IPSKB_REROUTED));
101} 98}
99
100void xfrm4_local_error(struct sk_buff *skb, u32 mtu)
101{
102 struct iphdr *hdr;
103
104 hdr = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
105 ip_local_error(skb->sk, EMSGSIZE, hdr->daddr,
106 inet_sk(skb->sk)->inet_dport, mtu);
107}
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 9258e751baba..0b2a0641526a 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -83,6 +83,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
83 .extract_input = xfrm4_extract_input, 83 .extract_input = xfrm4_extract_input,
84 .extract_output = xfrm4_extract_output, 84 .extract_output = xfrm4_extract_output,
85 .transport_finish = xfrm4_transport_finish, 85 .transport_finish = xfrm4_transport_finish,
86 .local_error = xfrm4_local_error,
86}; 87};
87 88
88void __init xfrm4_state_init(void) 89void __init xfrm4_state_init(void)
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 05a5df2febc9..06347dbd32c1 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -63,7 +63,7 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)
63static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { 63static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
64 .handler = xfrm_tunnel_rcv, 64 .handler = xfrm_tunnel_rcv,
65 .err_handler = xfrm_tunnel_err, 65 .err_handler = xfrm_tunnel_err,
66 .priority = 2, 66 .priority = 3,
67}; 67};
68 68
69#if IS_ENABLED(CONFIG_IPV6) 69#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 9af088d2cdaa..470a9c008e9b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o
7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ 7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
8 addrlabel.o \ 8 addrlabel.o \
9 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ 9 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
10 raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ 10 raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
11 exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o 11 exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
12 12
13ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o 13ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1bbf744c2cc3..cd3fb301da38 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -99,9 +99,9 @@
99#define ACONF_DEBUG 2 99#define ACONF_DEBUG 2
100 100
101#if ACONF_DEBUG >= 3 101#if ACONF_DEBUG >= 3
102#define ADBG(x) printk x 102#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
103#else 103#else
104#define ADBG(x) 104#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
105#endif 105#endif
106 106
107#define INFINITY_LIFE_TIME 0xFFFFFFFF 107#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -177,6 +177,8 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
177 .accept_redirects = 1, 177 .accept_redirects = 1,
178 .autoconf = 1, 178 .autoconf = 1,
179 .force_mld_version = 0, 179 .force_mld_version = 0,
180 .mldv1_unsolicited_report_interval = 10 * HZ,
181 .mldv2_unsolicited_report_interval = HZ,
180 .dad_transmits = 1, 182 .dad_transmits = 1,
181 .rtr_solicits = MAX_RTR_SOLICITATIONS, 183 .rtr_solicits = MAX_RTR_SOLICITATIONS,
182 .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, 184 .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
@@ -202,6 +204,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
202 .accept_source_route = 0, /* we do not accept RH0 by default. */ 204 .accept_source_route = 0, /* we do not accept RH0 by default. */
203 .disable_ipv6 = 0, 205 .disable_ipv6 = 0,
204 .accept_dad = 1, 206 .accept_dad = 1,
207 .suppress_frag_ndisc = 1,
205}; 208};
206 209
207static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { 210static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -211,6 +214,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
211 .accept_ra = 1, 214 .accept_ra = 1,
212 .accept_redirects = 1, 215 .accept_redirects = 1,
213 .autoconf = 1, 216 .autoconf = 1,
217 .force_mld_version = 0,
218 .mldv1_unsolicited_report_interval = 10 * HZ,
219 .mldv2_unsolicited_report_interval = HZ,
214 .dad_transmits = 1, 220 .dad_transmits = 1,
215 .rtr_solicits = MAX_RTR_SOLICITATIONS, 221 .rtr_solicits = MAX_RTR_SOLICITATIONS,
216 .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, 222 .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
@@ -236,54 +242,41 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
236 .accept_source_route = 0, /* we do not accept RH0 by default. */ 242 .accept_source_route = 0, /* we do not accept RH0 by default. */
237 .disable_ipv6 = 0, 243 .disable_ipv6 = 0,
238 .accept_dad = 1, 244 .accept_dad = 1,
245 .suppress_frag_ndisc = 1,
239}; 246};
240 247
241/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
242const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
243const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
244const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
245const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
246const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
247const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
248const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
249
250/* Check if a valid qdisc is available */ 248/* Check if a valid qdisc is available */
251static inline bool addrconf_qdisc_ok(const struct net_device *dev) 249static inline bool addrconf_qdisc_ok(const struct net_device *dev)
252{ 250{
253 return !qdisc_tx_is_noop(dev); 251 return !qdisc_tx_is_noop(dev);
254} 252}
255 253
256static void addrconf_del_timer(struct inet6_ifaddr *ifp) 254static void addrconf_del_rs_timer(struct inet6_dev *idev)
255{
256 if (del_timer(&idev->rs_timer))
257 __in6_dev_put(idev);
258}
259
260static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
257{ 261{
258 if (del_timer(&ifp->timer)) 262 if (del_timer(&ifp->dad_timer))
259 __in6_ifa_put(ifp); 263 __in6_ifa_put(ifp);
260} 264}
261 265
262enum addrconf_timer_t { 266static void addrconf_mod_rs_timer(struct inet6_dev *idev,
263 AC_NONE, 267 unsigned long when)
264 AC_DAD, 268{
265 AC_RS, 269 if (!timer_pending(&idev->rs_timer))
266}; 270 in6_dev_hold(idev);
271 mod_timer(&idev->rs_timer, jiffies + when);
272}
267 273
268static void addrconf_mod_timer(struct inet6_ifaddr *ifp, 274static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
269 enum addrconf_timer_t what, 275 unsigned long when)
270 unsigned long when)
271{ 276{
272 if (!del_timer(&ifp->timer)) 277 if (!timer_pending(&ifp->dad_timer))
273 in6_ifa_hold(ifp); 278 in6_ifa_hold(ifp);
274 279 mod_timer(&ifp->dad_timer, jiffies + when);
275 switch (what) {
276 case AC_DAD:
277 ifp->timer.function = addrconf_dad_timer;
278 break;
279 case AC_RS:
280 ifp->timer.function = addrconf_rs_timer;
281 break;
282 default:
283 break;
284 }
285 ifp->timer.expires = jiffies + when;
286 add_timer(&ifp->timer);
287} 280}
288 281
289static int snmp6_alloc_dev(struct inet6_dev *idev) 282static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -311,35 +304,6 @@ err_ip:
311 return -ENOMEM; 304 return -ENOMEM;
312} 305}
313 306
314static void snmp6_free_dev(struct inet6_dev *idev)
315{
316 kfree(idev->stats.icmpv6msgdev);
317 kfree(idev->stats.icmpv6dev);
318 snmp_mib_free((void __percpu **)idev->stats.ipv6);
319}
320
321/* Nobody refers to this device, we may destroy it. */
322
323void in6_dev_finish_destroy(struct inet6_dev *idev)
324{
325 struct net_device *dev = idev->dev;
326
327 WARN_ON(!list_empty(&idev->addr_list));
328 WARN_ON(idev->mc_list != NULL);
329
330#ifdef NET_REFCNT_DEBUG
331 pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
332#endif
333 dev_put(dev);
334 if (!idev->dead) {
335 pr_warn("Freeing alive inet6 device %p\n", idev);
336 return;
337 }
338 snmp6_free_dev(idev);
339 kfree_rcu(idev, rcu);
340}
341EXPORT_SYMBOL(in6_dev_finish_destroy);
342
343static struct inet6_dev *ipv6_add_dev(struct net_device *dev) 307static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
344{ 308{
345 struct inet6_dev *ndev; 309 struct inet6_dev *ndev;
@@ -357,7 +321,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
357 rwlock_init(&ndev->lock); 321 rwlock_init(&ndev->lock);
358 ndev->dev = dev; 322 ndev->dev = dev;
359 INIT_LIST_HEAD(&ndev->addr_list); 323 INIT_LIST_HEAD(&ndev->addr_list);
360 324 setup_timer(&ndev->rs_timer, addrconf_rs_timer,
325 (unsigned long)ndev);
361 memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); 326 memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
362 ndev->cnf.mtu6 = dev->mtu; 327 ndev->cnf.mtu6 = dev->mtu;
363 ndev->cnf.sysctl = NULL; 328 ndev->cnf.sysctl = NULL;
@@ -372,9 +337,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
372 dev_hold(dev); 337 dev_hold(dev);
373 338
374 if (snmp6_alloc_dev(ndev) < 0) { 339 if (snmp6_alloc_dev(ndev) < 0) {
375 ADBG((KERN_WARNING 340 ADBG(KERN_WARNING
376 "%s: cannot allocate memory for statistics; dev=%s.\n", 341 "%s: cannot allocate memory for statistics; dev=%s.\n",
377 __func__, dev->name)); 342 __func__, dev->name);
378 neigh_parms_release(&nd_tbl, ndev->nd_parms); 343 neigh_parms_release(&nd_tbl, ndev->nd_parms);
379 dev_put(dev); 344 dev_put(dev);
380 kfree(ndev); 345 kfree(ndev);
@@ -382,9 +347,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
382 } 347 }
383 348
384 if (snmp6_register_dev(ndev) < 0) { 349 if (snmp6_register_dev(ndev) < 0) {
385 ADBG((KERN_WARNING 350 ADBG(KERN_WARNING
386 "%s: cannot create /proc/net/dev_snmp6/%s\n", 351 "%s: cannot create /proc/net/dev_snmp6/%s\n",
387 __func__, dev->name)); 352 __func__, dev->name);
388 neigh_parms_release(&nd_tbl, ndev->nd_parms); 353 neigh_parms_release(&nd_tbl, ndev->nd_parms);
389 ndev->dead = 1; 354 ndev->dead = 1;
390 in6_dev_finish_destroy(ndev); 355 in6_dev_finish_destroy(ndev);
@@ -776,7 +741,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
776 741
777 in6_dev_put(ifp->idev); 742 in6_dev_put(ifp->idev);
778 743
779 if (del_timer(&ifp->timer)) 744 if (del_timer(&ifp->dad_timer))
780 pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); 745 pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
781 746
782 if (ifp->state != INET6_IFADDR_STATE_DEAD) { 747 if (ifp->state != INET6_IFADDR_STATE_DEAD) {
@@ -816,8 +781,9 @@ static u32 inet6_addr_hash(const struct in6_addr *addr)
816/* On success it returns ifp with increased reference count */ 781/* On success it returns ifp with increased reference count */
817 782
818static struct inet6_ifaddr * 783static struct inet6_ifaddr *
819ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, 784ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
820 int scope, u32 flags) 785 const struct in6_addr *peer_addr, int pfxlen,
786 int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
821{ 787{
822 struct inet6_ifaddr *ifa = NULL; 788 struct inet6_ifaddr *ifa = NULL;
823 struct rt6_info *rt; 789 struct rt6_info *rt;
@@ -846,7 +812,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
846 812
847 /* Ignore adding duplicate addresses on an interface */ 813 /* Ignore adding duplicate addresses on an interface */
848 if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { 814 if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
849 ADBG(("ipv6_add_addr: already assigned\n")); 815 ADBG("ipv6_add_addr: already assigned\n");
850 err = -EEXIST; 816 err = -EEXIST;
851 goto out; 817 goto out;
852 } 818 }
@@ -854,7 +820,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
854 ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); 820 ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
855 821
856 if (ifa == NULL) { 822 if (ifa == NULL) {
857 ADBG(("ipv6_add_addr: malloc failed\n")); 823 ADBG("ipv6_add_addr: malloc failed\n");
858 err = -ENOBUFS; 824 err = -ENOBUFS;
859 goto out; 825 goto out;
860 } 826 }
@@ -866,15 +832,19 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
866 } 832 }
867 833
868 ifa->addr = *addr; 834 ifa->addr = *addr;
835 if (peer_addr)
836 ifa->peer_addr = *peer_addr;
869 837
870 spin_lock_init(&ifa->lock); 838 spin_lock_init(&ifa->lock);
871 spin_lock_init(&ifa->state_lock); 839 spin_lock_init(&ifa->state_lock);
872 init_timer(&ifa->timer); 840 setup_timer(&ifa->dad_timer, addrconf_dad_timer,
841 (unsigned long)ifa);
873 INIT_HLIST_NODE(&ifa->addr_lst); 842 INIT_HLIST_NODE(&ifa->addr_lst);
874 ifa->timer.data = (unsigned long) ifa;
875 ifa->scope = scope; 843 ifa->scope = scope;
876 ifa->prefix_len = pfxlen; 844 ifa->prefix_len = pfxlen;
877 ifa->flags = flags | IFA_F_TENTATIVE; 845 ifa->flags = flags | IFA_F_TENTATIVE;
846 ifa->valid_lft = valid_lft;
847 ifa->prefered_lft = prefered_lft;
878 ifa->cstamp = ifa->tstamp = jiffies; 848 ifa->cstamp = ifa->tstamp = jiffies;
879 ifa->tokenized = false; 849 ifa->tokenized = false;
880 850
@@ -994,7 +964,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
994 } 964 }
995 write_unlock_bh(&idev->lock); 965 write_unlock_bh(&idev->lock);
996 966
997 addrconf_del_timer(ifp); 967 addrconf_del_dad_timer(ifp);
998 968
999 ipv6_ifa_notify(RTM_DELADDR, ifp); 969 ipv6_ifa_notify(RTM_DELADDR, ifp);
1000 970
@@ -1052,7 +1022,6 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
1052 unsigned long regen_advance; 1022 unsigned long regen_advance;
1053 int tmp_plen; 1023 int tmp_plen;
1054 int ret = 0; 1024 int ret = 0;
1055 int max_addresses;
1056 u32 addr_flags; 1025 u32 addr_flags;
1057 unsigned long now = jiffies; 1026 unsigned long now = jiffies;
1058 1027
@@ -1098,7 +1067,6 @@ retry:
1098 idev->cnf.temp_prefered_lft + age - 1067 idev->cnf.temp_prefered_lft + age -
1099 idev->cnf.max_desync_factor); 1068 idev->cnf.max_desync_factor);
1100 tmp_plen = ifp->prefix_len; 1069 tmp_plen = ifp->prefix_len;
1101 max_addresses = idev->cnf.max_addresses;
1102 tmp_tstamp = ifp->tstamp; 1070 tmp_tstamp = ifp->tstamp;
1103 spin_unlock_bh(&ifp->lock); 1071 spin_unlock_bh(&ifp->lock);
1104 1072
@@ -1124,12 +1092,10 @@ retry:
1124 if (ifp->flags & IFA_F_OPTIMISTIC) 1092 if (ifp->flags & IFA_F_OPTIMISTIC)
1125 addr_flags |= IFA_F_OPTIMISTIC; 1093 addr_flags |= IFA_F_OPTIMISTIC;
1126 1094
1127 ift = !max_addresses || 1095 ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
1128 ipv6_count_addresses(idev) < max_addresses ? 1096 ipv6_addr_scope(&addr), addr_flags,
1129 ipv6_add_addr(idev, &addr, tmp_plen, 1097 tmp_valid_lft, tmp_prefered_lft);
1130 ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, 1098 if (IS_ERR(ift)) {
1131 addr_flags) : NULL;
1132 if (IS_ERR_OR_NULL(ift)) {
1133 in6_ifa_put(ifp); 1099 in6_ifa_put(ifp);
1134 in6_dev_put(idev); 1100 in6_dev_put(idev);
1135 pr_info("%s: retry temporary address regeneration\n", __func__); 1101 pr_info("%s: retry temporary address regeneration\n", __func__);
@@ -1140,8 +1106,6 @@ retry:
1140 1106
1141 spin_lock_bh(&ift->lock); 1107 spin_lock_bh(&ift->lock);
1142 ift->ifpub = ifp; 1108 ift->ifpub = ifp;
1143 ift->valid_lft = tmp_valid_lft;
1144 ift->prefered_lft = tmp_prefered_lft;
1145 ift->cstamp = now; 1109 ift->cstamp = now;
1146 ift->tstamp = tmp_tstamp; 1110 ift->tstamp = tmp_tstamp;
1147 spin_unlock_bh(&ift->lock); 1111 spin_unlock_bh(&ift->lock);
@@ -1448,6 +1412,23 @@ try_nextdev:
1448} 1412}
1449EXPORT_SYMBOL(ipv6_dev_get_saddr); 1413EXPORT_SYMBOL(ipv6_dev_get_saddr);
1450 1414
1415int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
1416 unsigned char banned_flags)
1417{
1418 struct inet6_ifaddr *ifp;
1419 int err = -EADDRNOTAVAIL;
1420
1421 list_for_each_entry(ifp, &idev->addr_list, if_list) {
1422 if (ifp->scope == IFA_LINK &&
1423 !(ifp->flags & banned_flags)) {
1424 *addr = ifp->addr;
1425 err = 0;
1426 break;
1427 }
1428 }
1429 return err;
1430}
1431
1451int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, 1432int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
1452 unsigned char banned_flags) 1433 unsigned char banned_flags)
1453{ 1434{
@@ -1457,17 +1438,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
1457 rcu_read_lock(); 1438 rcu_read_lock();
1458 idev = __in6_dev_get(dev); 1439 idev = __in6_dev_get(dev);
1459 if (idev) { 1440 if (idev) {
1460 struct inet6_ifaddr *ifp;
1461
1462 read_lock_bh(&idev->lock); 1441 read_lock_bh(&idev->lock);
1463 list_for_each_entry(ifp, &idev->addr_list, if_list) { 1442 err = __ipv6_get_lladdr(idev, addr, banned_flags);
1464 if (ifp->scope == IFA_LINK &&
1465 !(ifp->flags & banned_flags)) {
1466 *addr = ifp->addr;
1467 err = 0;
1468 break;
1469 }
1470 }
1471 read_unlock_bh(&idev->lock); 1443 read_unlock_bh(&idev->lock);
1472 } 1444 }
1473 rcu_read_unlock(); 1445 rcu_read_unlock();
@@ -1527,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1527 return false; 1499 return false;
1528} 1500}
1529 1501
1502/* Compares an address/prefix_len with addresses on device @dev.
1503 * If one is found it returns true.
1504 */
1505bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
1506 const unsigned int prefix_len, struct net_device *dev)
1507{
1508 struct inet6_dev *idev;
1509 struct inet6_ifaddr *ifa;
1510 bool ret = false;
1511
1512 rcu_read_lock();
1513 idev = __in6_dev_get(dev);
1514 if (idev) {
1515 read_lock_bh(&idev->lock);
1516 list_for_each_entry(ifa, &idev->addr_list, if_list) {
1517 ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
1518 if (ret)
1519 break;
1520 }
1521 read_unlock_bh(&idev->lock);
1522 }
1523 rcu_read_unlock();
1524
1525 return ret;
1526}
1527EXPORT_SYMBOL(ipv6_chk_custom_prefix);
1528
1530int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) 1529int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
1531{ 1530{
1532 struct inet6_dev *idev; 1531 struct inet6_dev *idev;
@@ -1581,7 +1580,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
1581{ 1580{
1582 if (ifp->flags&IFA_F_PERMANENT) { 1581 if (ifp->flags&IFA_F_PERMANENT) {
1583 spin_lock_bh(&ifp->lock); 1582 spin_lock_bh(&ifp->lock);
1584 addrconf_del_timer(ifp); 1583 addrconf_del_dad_timer(ifp);
1585 ifp->flags |= IFA_F_TENTATIVE; 1584 ifp->flags |= IFA_F_TENTATIVE;
1586 if (dad_failed) 1585 if (dad_failed)
1587 ifp->flags |= IFA_F_DADFAILED; 1586 ifp->flags |= IFA_F_DADFAILED;
@@ -1801,6 +1800,16 @@ static int addrconf_ifid_gre(u8 *eui, struct net_device *dev)
1801 return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr); 1800 return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
1802} 1801}
1803 1802
1803static int addrconf_ifid_ip6tnl(u8 *eui, struct net_device *dev)
1804{
1805 memcpy(eui, dev->perm_addr, 3);
1806 memcpy(eui + 5, dev->perm_addr + 3, 3);
1807 eui[3] = 0xFF;
1808 eui[4] = 0xFE;
1809 eui[0] ^= 2;
1810 return 0;
1811}
1812
1804static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) 1813static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1805{ 1814{
1806 switch (dev->type) { 1815 switch (dev->type) {
@@ -1819,6 +1828,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1819 return addrconf_ifid_eui64(eui, dev); 1828 return addrconf_ifid_eui64(eui, dev);
1820 case ARPHRD_IEEE1394: 1829 case ARPHRD_IEEE1394:
1821 return addrconf_ifid_ieee1394(eui, dev); 1830 return addrconf_ifid_ieee1394(eui, dev);
1831 case ARPHRD_TUNNEL6:
1832 return addrconf_ifid_ip6tnl(eui, dev);
1822 } 1833 }
1823 return -1; 1834 return -1;
1824} 1835}
@@ -2044,7 +2055,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
2044 pinfo = (struct prefix_info *) opt; 2055 pinfo = (struct prefix_info *) opt;
2045 2056
2046 if (len < sizeof(struct prefix_info)) { 2057 if (len < sizeof(struct prefix_info)) {
2047 ADBG(("addrconf: prefix option too short\n")); 2058 ADBG("addrconf: prefix option too short\n");
2048 return; 2059 return;
2049 } 2060 }
2050 2061
@@ -2175,16 +2186,19 @@ ok:
2175 */ 2186 */
2176 if (!max_addresses || 2187 if (!max_addresses ||
2177 ipv6_count_addresses(in6_dev) < max_addresses) 2188 ipv6_count_addresses(in6_dev) < max_addresses)
2178 ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, 2189 ifp = ipv6_add_addr(in6_dev, &addr, NULL,
2190 pinfo->prefix_len,
2179 addr_type&IPV6_ADDR_SCOPE_MASK, 2191 addr_type&IPV6_ADDR_SCOPE_MASK,
2180 addr_flags); 2192 addr_flags, valid_lft,
2193 prefered_lft);
2181 2194
2182 if (IS_ERR_OR_NULL(ifp)) { 2195 if (IS_ERR_OR_NULL(ifp)) {
2183 in6_dev_put(in6_dev); 2196 in6_dev_put(in6_dev);
2184 return; 2197 return;
2185 } 2198 }
2186 2199
2187 update_lft = create = 1; 2200 update_lft = 0;
2201 create = 1;
2188 ifp->cstamp = jiffies; 2202 ifp->cstamp = jiffies;
2189 ifp->tokenized = tokenized; 2203 ifp->tokenized = tokenized;
2190 addrconf_dad_start(ifp); 2204 addrconf_dad_start(ifp);
@@ -2205,44 +2219,22 @@ ok:
2205 stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; 2219 stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
2206 else 2220 else
2207 stored_lft = 0; 2221 stored_lft = 0;
2208 if (!update_lft && stored_lft) { 2222 if (!update_lft && !create && stored_lft) {
2209 if (valid_lft > MIN_VALID_LIFETIME || 2223 const u32 minimum_lft = min(
2210 valid_lft > stored_lft) 2224 stored_lft, (u32)MIN_VALID_LIFETIME);
2211 update_lft = 1; 2225 valid_lft = max(valid_lft, minimum_lft);
2212 else if (stored_lft <= MIN_VALID_LIFETIME) { 2226
2213 /* valid_lft <= stored_lft is always true */ 2227 /* RFC4862 Section 5.5.3e:
2214 /* 2228 * "Note that the preferred lifetime of the
2215 * RFC 4862 Section 5.5.3e: 2229 * corresponding address is always reset to
2216 * "Note that the preferred lifetime of 2230 * the Preferred Lifetime in the received
2217 * the corresponding address is always 2231 * Prefix Information option, regardless of
2218 * reset to the Preferred Lifetime in 2232 * whether the valid lifetime is also reset or
2219 * the received Prefix Information 2233 * ignored."
2220 * option, regardless of whether the 2234 *
2221 * valid lifetime is also reset or 2235 * So we should always update prefered_lft here.
2222 * ignored." 2236 */
2223 * 2237 update_lft = 1;
2224 * So if the preferred lifetime in
2225 * this advertisement is different
2226 * than what we have stored, but the
2227 * valid lifetime is invalid, just
2228 * reset prefered_lft.
2229 *
2230 * We must set the valid lifetime
2231 * to the stored lifetime since we'll
2232 * be updating the timestamp below,
2233 * else we'll set it back to the
2234 * minimum.
2235 */
2236 if (prefered_lft != ifp->prefered_lft) {
2237 valid_lft = stored_lft;
2238 update_lft = 1;
2239 }
2240 } else {
2241 valid_lft = MIN_VALID_LIFETIME;
2242 if (valid_lft < prefered_lft)
2243 prefered_lft = valid_lft;
2244 update_lft = 1;
2245 }
2246 } 2238 }
2247 2239
2248 if (update_lft) { 2240 if (update_lft) {
@@ -2402,6 +2394,7 @@ err_exit:
2402 * Manual configuration of address on an interface 2394 * Manual configuration of address on an interface
2403 */ 2395 */
2404static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, 2396static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx,
2397 const struct in6_addr *peer_pfx,
2405 unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, 2398 unsigned int plen, __u8 ifa_flags, __u32 prefered_lft,
2406 __u32 valid_lft) 2399 __u32 valid_lft)
2407{ 2400{
@@ -2450,15 +2443,10 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p
2450 prefered_lft = timeout; 2443 prefered_lft = timeout;
2451 } 2444 }
2452 2445
2453 ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); 2446 ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
2447 valid_lft, prefered_lft);
2454 2448
2455 if (!IS_ERR(ifp)) { 2449 if (!IS_ERR(ifp)) {
2456 spin_lock_bh(&ifp->lock);
2457 ifp->valid_lft = valid_lft;
2458 ifp->prefered_lft = prefered_lft;
2459 ifp->tstamp = jiffies;
2460 spin_unlock_bh(&ifp->lock);
2461
2462 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 2450 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
2463 expires, flags); 2451 expires, flags);
2464 /* 2452 /*
@@ -2500,12 +2488,6 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p
2500 read_unlock_bh(&idev->lock); 2488 read_unlock_bh(&idev->lock);
2501 2489
2502 ipv6_del_addr(ifp); 2490 ipv6_del_addr(ifp);
2503
2504 /* If the last address is deleted administratively,
2505 disable IPv6 on this interface.
2506 */
2507 if (list_empty(&idev->addr_list))
2508 addrconf_ifdown(idev->dev, 1);
2509 return 0; 2491 return 0;
2510 } 2492 }
2511 } 2493 }
@@ -2526,7 +2508,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
2526 return -EFAULT; 2508 return -EFAULT;
2527 2509
2528 rtnl_lock(); 2510 rtnl_lock();
2529 err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, 2511 err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
2530 ireq.ifr6_prefixlen, IFA_F_PERMANENT, 2512 ireq.ifr6_prefixlen, IFA_F_PERMANENT,
2531 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 2513 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
2532 rtnl_unlock(); 2514 rtnl_unlock();
@@ -2556,7 +2538,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
2556{ 2538{
2557 struct inet6_ifaddr *ifp; 2539 struct inet6_ifaddr *ifp;
2558 2540
2559 ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT); 2541 ifp = ipv6_add_addr(idev, addr, NULL, plen,
2542 scope, IFA_F_PERMANENT, 0, 0);
2560 if (!IS_ERR(ifp)) { 2543 if (!IS_ERR(ifp)) {
2561 spin_lock_bh(&ifp->lock); 2544 spin_lock_bh(&ifp->lock);
2562 ifp->flags &= ~IFA_F_TENTATIVE; 2545 ifp->flags &= ~IFA_F_TENTATIVE;
@@ -2655,6 +2638,9 @@ static void init_loopback(struct net_device *dev)
2655 if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE)) 2638 if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
2656 continue; 2639 continue;
2657 2640
2641 if (sp_ifa->rt)
2642 continue;
2643
2658 sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); 2644 sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0);
2659 2645
2660 /* Failure cases are ignored */ 2646 /* Failure cases are ignored */
@@ -2679,7 +2665,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
2679#endif 2665#endif
2680 2666
2681 2667
2682 ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); 2668 ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0);
2683 if (!IS_ERR(ifp)) { 2669 if (!IS_ERR(ifp)) {
2684 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); 2670 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
2685 addrconf_dad_start(ifp); 2671 addrconf_dad_start(ifp);
@@ -2699,7 +2685,8 @@ static void addrconf_dev_config(struct net_device *dev)
2699 (dev->type != ARPHRD_ARCNET) && 2685 (dev->type != ARPHRD_ARCNET) &&
2700 (dev->type != ARPHRD_INFINIBAND) && 2686 (dev->type != ARPHRD_INFINIBAND) &&
2701 (dev->type != ARPHRD_IEEE802154) && 2687 (dev->type != ARPHRD_IEEE802154) &&
2702 (dev->type != ARPHRD_IEEE1394)) { 2688 (dev->type != ARPHRD_IEEE1394) &&
2689 (dev->type != ARPHRD_TUNNEL6)) {
2703 /* Alas, we support only Ethernet autoconfiguration. */ 2690 /* Alas, we support only Ethernet autoconfiguration. */
2704 return; 2691 return;
2705 } 2692 }
@@ -2758,8 +2745,6 @@ static void addrconf_gre_config(struct net_device *dev)
2758 struct inet6_dev *idev; 2745 struct inet6_dev *idev;
2759 struct in6_addr addr; 2746 struct in6_addr addr;
2760 2747
2761 pr_info("%s(%s)\n", __func__, dev->name);
2762
2763 ASSERT_RTNL(); 2748 ASSERT_RTNL();
2764 2749
2765 if ((idev = ipv6_find_idev(dev)) == NULL) { 2750 if ((idev = ipv6_find_idev(dev)) == NULL) {
@@ -2787,48 +2772,10 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
2787 return -1; 2772 return -1;
2788} 2773}
2789 2774
2790static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
2791{
2792 struct net_device *link_dev;
2793 struct net *net = dev_net(idev->dev);
2794
2795 /* first try to inherit the link-local address from the link device */
2796 if (idev->dev->iflink &&
2797 (link_dev = __dev_get_by_index(net, idev->dev->iflink))) {
2798 if (!ipv6_inherit_linklocal(idev, link_dev))
2799 return;
2800 }
2801 /* then try to inherit it from any device */
2802 for_each_netdev(net, link_dev) {
2803 if (!ipv6_inherit_linklocal(idev, link_dev))
2804 return;
2805 }
2806 pr_debug("init ip6-ip6: add_linklocal failed\n");
2807}
2808
2809/*
2810 * Autoconfigure tunnel with a link-local address so routing protocols,
2811 * DHCPv6, MLD etc. can be run over the virtual link
2812 */
2813
2814static void addrconf_ip6_tnl_config(struct net_device *dev)
2815{
2816 struct inet6_dev *idev;
2817
2818 ASSERT_RTNL();
2819
2820 idev = addrconf_add_dev(dev);
2821 if (IS_ERR(idev)) {
2822 pr_debug("init ip6-ip6: add_dev failed\n");
2823 return;
2824 }
2825 ip6_tnl_add_linklocal(idev);
2826}
2827
2828static int addrconf_notify(struct notifier_block *this, unsigned long event, 2775static int addrconf_notify(struct notifier_block *this, unsigned long event,
2829 void *data) 2776 void *ptr)
2830{ 2777{
2831 struct net_device *dev = (struct net_device *) data; 2778 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2832 struct inet6_dev *idev = __in6_dev_get(dev); 2779 struct inet6_dev *idev = __in6_dev_get(dev);
2833 int run_pending = 0; 2780 int run_pending = 0;
2834 int err; 2781 int err;
@@ -2892,9 +2839,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2892 addrconf_gre_config(dev); 2839 addrconf_gre_config(dev);
2893 break; 2840 break;
2894#endif 2841#endif
2895 case ARPHRD_TUNNEL6:
2896 addrconf_ip6_tnl_config(dev);
2897 break;
2898 case ARPHRD_LOOPBACK: 2842 case ARPHRD_LOOPBACK:
2899 init_loopback(dev); 2843 init_loopback(dev);
2900 break; 2844 break;
@@ -3036,7 +2980,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3036 hlist_for_each_entry_rcu(ifa, h, addr_lst) { 2980 hlist_for_each_entry_rcu(ifa, h, addr_lst) {
3037 if (ifa->idev == idev) { 2981 if (ifa->idev == idev) {
3038 hlist_del_init_rcu(&ifa->addr_lst); 2982 hlist_del_init_rcu(&ifa->addr_lst);
3039 addrconf_del_timer(ifa); 2983 addrconf_del_dad_timer(ifa);
3040 goto restart; 2984 goto restart;
3041 } 2985 }
3042 } 2986 }
@@ -3045,6 +2989,8 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3045 2989
3046 write_lock_bh(&idev->lock); 2990 write_lock_bh(&idev->lock);
3047 2991
2992 addrconf_del_rs_timer(idev);
2993
3048 /* Step 2: clear flags for stateless addrconf */ 2994 /* Step 2: clear flags for stateless addrconf */
3049 if (!how) 2995 if (!how)
3050 idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); 2996 idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
@@ -3074,7 +3020,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3074 while (!list_empty(&idev->addr_list)) { 3020 while (!list_empty(&idev->addr_list)) {
3075 ifa = list_first_entry(&idev->addr_list, 3021 ifa = list_first_entry(&idev->addr_list,
3076 struct inet6_ifaddr, if_list); 3022 struct inet6_ifaddr, if_list);
3077 addrconf_del_timer(ifa); 3023 addrconf_del_dad_timer(ifa);
3078 3024
3079 list_del(&ifa->if_list); 3025 list_del(&ifa->if_list);
3080 3026
@@ -3116,10 +3062,11 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3116 3062
3117static void addrconf_rs_timer(unsigned long data) 3063static void addrconf_rs_timer(unsigned long data)
3118{ 3064{
3119 struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; 3065 struct inet6_dev *idev = (struct inet6_dev *)data;
3120 struct inet6_dev *idev = ifp->idev; 3066 struct net_device *dev = idev->dev;
3067 struct in6_addr lladdr;
3121 3068
3122 read_lock(&idev->lock); 3069 write_lock(&idev->lock);
3123 if (idev->dead || !(idev->if_flags & IF_READY)) 3070 if (idev->dead || !(idev->if_flags & IF_READY))
3124 goto out; 3071 goto out;
3125 3072
@@ -3130,18 +3077,21 @@ static void addrconf_rs_timer(unsigned long data)
3130 if (idev->if_flags & IF_RA_RCVD) 3077 if (idev->if_flags & IF_RA_RCVD)
3131 goto out; 3078 goto out;
3132 3079
3133 spin_lock(&ifp->lock); 3080 if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
3134 if (ifp->probes++ < idev->cnf.rtr_solicits) { 3081 write_unlock(&idev->lock);
3135 /* The wait after the last probe can be shorter */ 3082 if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
3136 addrconf_mod_timer(ifp, AC_RS, 3083 ndisc_send_rs(dev, &lladdr,
3137 (ifp->probes == idev->cnf.rtr_solicits) ? 3084 &in6addr_linklocal_allrouters);
3138 idev->cnf.rtr_solicit_delay : 3085 else
3139 idev->cnf.rtr_solicit_interval); 3086 goto put;
3140 spin_unlock(&ifp->lock);
3141 3087
3142 ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); 3088 write_lock(&idev->lock);
3089 /* The wait after the last probe can be shorter */
3090 addrconf_mod_rs_timer(idev, (idev->rs_probes ==
3091 idev->cnf.rtr_solicits) ?
3092 idev->cnf.rtr_solicit_delay :
3093 idev->cnf.rtr_solicit_interval);
3143 } else { 3094 } else {
3144 spin_unlock(&ifp->lock);
3145 /* 3095 /*
3146 * Note: we do not support deprecated "all on-link" 3096 * Note: we do not support deprecated "all on-link"
3147 * assumption any longer. 3097 * assumption any longer.
@@ -3150,8 +3100,9 @@ static void addrconf_rs_timer(unsigned long data)
3150 } 3100 }
3151 3101
3152out: 3102out:
3153 read_unlock(&idev->lock); 3103 write_unlock(&idev->lock);
3154 in6_ifa_put(ifp); 3104put:
3105 in6_dev_put(idev);
3155} 3106}
3156 3107
3157/* 3108/*
@@ -3167,8 +3118,8 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
3167 else 3118 else
3168 rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); 3119 rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
3169 3120
3170 ifp->probes = idev->cnf.dad_transmits; 3121 ifp->dad_probes = idev->cnf.dad_transmits;
3171 addrconf_mod_timer(ifp, AC_DAD, rand_num); 3122 addrconf_mod_dad_timer(ifp, rand_num);
3172} 3123}
3173 3124
3174static void addrconf_dad_start(struct inet6_ifaddr *ifp) 3125static void addrconf_dad_start(struct inet6_ifaddr *ifp)
@@ -3229,40 +3180,40 @@ static void addrconf_dad_timer(unsigned long data)
3229 struct inet6_dev *idev = ifp->idev; 3180 struct inet6_dev *idev = ifp->idev;
3230 struct in6_addr mcaddr; 3181 struct in6_addr mcaddr;
3231 3182
3232 if (!ifp->probes && addrconf_dad_end(ifp)) 3183 if (!ifp->dad_probes && addrconf_dad_end(ifp))
3233 goto out; 3184 goto out;
3234 3185
3235 read_lock(&idev->lock); 3186 write_lock(&idev->lock);
3236 if (idev->dead || !(idev->if_flags & IF_READY)) { 3187 if (idev->dead || !(idev->if_flags & IF_READY)) {
3237 read_unlock(&idev->lock); 3188 write_unlock(&idev->lock);
3238 goto out; 3189 goto out;
3239 } 3190 }
3240 3191
3241 spin_lock(&ifp->lock); 3192 spin_lock(&ifp->lock);
3242 if (ifp->state == INET6_IFADDR_STATE_DEAD) { 3193 if (ifp->state == INET6_IFADDR_STATE_DEAD) {
3243 spin_unlock(&ifp->lock); 3194 spin_unlock(&ifp->lock);
3244 read_unlock(&idev->lock); 3195 write_unlock(&idev->lock);
3245 goto out; 3196 goto out;
3246 } 3197 }
3247 3198
3248 if (ifp->probes == 0) { 3199 if (ifp->dad_probes == 0) {
3249 /* 3200 /*
3250 * DAD was successful 3201 * DAD was successful
3251 */ 3202 */
3252 3203
3253 ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); 3204 ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
3254 spin_unlock(&ifp->lock); 3205 spin_unlock(&ifp->lock);
3255 read_unlock(&idev->lock); 3206 write_unlock(&idev->lock);
3256 3207
3257 addrconf_dad_completed(ifp); 3208 addrconf_dad_completed(ifp);
3258 3209
3259 goto out; 3210 goto out;
3260 } 3211 }
3261 3212
3262 ifp->probes--; 3213 ifp->dad_probes--;
3263 addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); 3214 addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time);
3264 spin_unlock(&ifp->lock); 3215 spin_unlock(&ifp->lock);
3265 read_unlock(&idev->lock); 3216 write_unlock(&idev->lock);
3266 3217
3267 /* send a neighbour solicitation for our addr */ 3218 /* send a neighbour solicitation for our addr */
3268 addrconf_addr_solict_mult(&ifp->addr, &mcaddr); 3219 addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
@@ -3274,6 +3225,10 @@ out:
3274static void addrconf_dad_completed(struct inet6_ifaddr *ifp) 3225static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
3275{ 3226{
3276 struct net_device *dev = ifp->idev->dev; 3227 struct net_device *dev = ifp->idev->dev;
3228 struct in6_addr lladdr;
3229 bool send_rs, send_mld;
3230
3231 addrconf_del_dad_timer(ifp);
3277 3232
3278 /* 3233 /*
3279 * Configure the address for reception. Now it is valid. 3234 * Configure the address for reception. Now it is valid.
@@ -3285,22 +3240,41 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
3285 router advertisements, start sending router solicitations. 3240 router advertisements, start sending router solicitations.
3286 */ 3241 */
3287 3242
3288 if (ipv6_accept_ra(ifp->idev) && 3243 read_lock_bh(&ifp->idev->lock);
3289 ifp->idev->cnf.rtr_solicits > 0 && 3244 spin_lock(&ifp->lock);
3290 (dev->flags&IFF_LOOPBACK) == 0 && 3245 send_mld = ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL &&
3291 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { 3246 ifp->idev->valid_ll_addr_cnt == 1;
3247 send_rs = send_mld &&
3248 ipv6_accept_ra(ifp->idev) &&
3249 ifp->idev->cnf.rtr_solicits > 0 &&
3250 (dev->flags&IFF_LOOPBACK) == 0;
3251 spin_unlock(&ifp->lock);
3252 read_unlock_bh(&ifp->idev->lock);
3253
3254 /* While dad is in progress mld report's source address is in6_addrany.
3255 * Resend with proper ll now.
3256 */
3257 if (send_mld)
3258 ipv6_mc_dad_complete(ifp->idev);
3259
3260 if (send_rs) {
3292 /* 3261 /*
3293 * If a host as already performed a random delay 3262 * If a host as already performed a random delay
3294 * [...] as part of DAD [...] there is no need 3263 * [...] as part of DAD [...] there is no need
3295 * to delay again before sending the first RS 3264 * to delay again before sending the first RS
3296 */ 3265 */
3297 ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); 3266 if (ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
3267 return;
3268 ndisc_send_rs(dev, &lladdr, &in6addr_linklocal_allrouters);
3298 3269
3299 spin_lock_bh(&ifp->lock); 3270 write_lock_bh(&ifp->idev->lock);
3300 ifp->probes = 1; 3271 spin_lock(&ifp->lock);
3272 ifp->idev->rs_probes = 1;
3301 ifp->idev->if_flags |= IF_RS_SENT; 3273 ifp->idev->if_flags |= IF_RS_SENT;
3302 addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); 3274 addrconf_mod_rs_timer(ifp->idev,
3303 spin_unlock_bh(&ifp->lock); 3275 ifp->idev->cnf.rtr_solicit_interval);
3276 spin_unlock(&ifp->lock);
3277 write_unlock_bh(&ifp->idev->lock);
3304 } 3278 }
3305} 3279}
3306 3280
@@ -3603,8 +3577,8 @@ restart:
3603 if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX)) 3577 if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
3604 next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX; 3578 next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
3605 3579
3606 ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", 3580 ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
3607 now, next, next_sec, next_sched)); 3581 now, next, next_sec, next_sched);
3608 3582
3609 addr_chk_timer.expires = next_sched; 3583 addr_chk_timer.expires = next_sched;
3610 add_timer(&addr_chk_timer); 3584 add_timer(&addr_chk_timer);
@@ -3612,18 +3586,20 @@ restart:
3612 rcu_read_unlock_bh(); 3586 rcu_read_unlock_bh();
3613} 3587}
3614 3588
3615static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) 3589static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
3590 struct in6_addr **peer_pfx)
3616{ 3591{
3617 struct in6_addr *pfx = NULL; 3592 struct in6_addr *pfx = NULL;
3618 3593
3594 *peer_pfx = NULL;
3595
3619 if (addr) 3596 if (addr)
3620 pfx = nla_data(addr); 3597 pfx = nla_data(addr);
3621 3598
3622 if (local) { 3599 if (local) {
3623 if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) 3600 if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
3624 pfx = NULL; 3601 *peer_pfx = pfx;
3625 else 3602 pfx = nla_data(local);
3626 pfx = nla_data(local);
3627 } 3603 }
3628 3604
3629 return pfx; 3605 return pfx;
@@ -3641,7 +3617,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
3641 struct net *net = sock_net(skb->sk); 3617 struct net *net = sock_net(skb->sk);
3642 struct ifaddrmsg *ifm; 3618 struct ifaddrmsg *ifm;
3643 struct nlattr *tb[IFA_MAX+1]; 3619 struct nlattr *tb[IFA_MAX+1];
3644 struct in6_addr *pfx; 3620 struct in6_addr *pfx, *peer_pfx;
3645 int err; 3621 int err;
3646 3622
3647 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); 3623 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3649,7 +3625,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
3649 return err; 3625 return err;
3650 3626
3651 ifm = nlmsg_data(nlh); 3627 ifm = nlmsg_data(nlh);
3652 pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); 3628 pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
3653 if (pfx == NULL) 3629 if (pfx == NULL)
3654 return -EINVAL; 3630 return -EINVAL;
3655 3631
@@ -3707,7 +3683,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
3707 struct net *net = sock_net(skb->sk); 3683 struct net *net = sock_net(skb->sk);
3708 struct ifaddrmsg *ifm; 3684 struct ifaddrmsg *ifm;
3709 struct nlattr *tb[IFA_MAX+1]; 3685 struct nlattr *tb[IFA_MAX+1];
3710 struct in6_addr *pfx; 3686 struct in6_addr *pfx, *peer_pfx;
3711 struct inet6_ifaddr *ifa; 3687 struct inet6_ifaddr *ifa;
3712 struct net_device *dev; 3688 struct net_device *dev;
3713 u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; 3689 u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
@@ -3719,7 +3695,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
3719 return err; 3695 return err;
3720 3696
3721 ifm = nlmsg_data(nlh); 3697 ifm = nlmsg_data(nlh);
3722 pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); 3698 pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
3723 if (pfx == NULL) 3699 if (pfx == NULL)
3724 return -EINVAL; 3700 return -EINVAL;
3725 3701
@@ -3747,7 +3723,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
3747 * It would be best to check for !NLM_F_CREATE here but 3723 * It would be best to check for !NLM_F_CREATE here but
3748 * userspace alreay relies on not having to provide this. 3724 * userspace alreay relies on not having to provide this.
3749 */ 3725 */
3750 return inet6_addr_add(net, ifm->ifa_index, pfx, 3726 return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
3751 ifm->ifa_prefixlen, ifa_flags, 3727 ifm->ifa_prefixlen, ifa_flags,
3752 preferred_lft, valid_lft); 3728 preferred_lft, valid_lft);
3753 } 3729 }
@@ -3804,6 +3780,7 @@ static inline int rt_scope(int ifa_scope)
3804static inline int inet6_ifaddr_msgsize(void) 3780static inline int inet6_ifaddr_msgsize(void)
3805{ 3781{
3806 return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) 3782 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
3783 + nla_total_size(16) /* IFA_LOCAL */
3807 + nla_total_size(16) /* IFA_ADDRESS */ 3784 + nla_total_size(16) /* IFA_ADDRESS */
3808 + nla_total_size(sizeof(struct ifa_cacheinfo)); 3785 + nla_total_size(sizeof(struct ifa_cacheinfo));
3809} 3786}
@@ -3842,13 +3819,22 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
3842 valid = INFINITY_LIFE_TIME; 3819 valid = INFINITY_LIFE_TIME;
3843 } 3820 }
3844 3821
3845 if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || 3822 if (!ipv6_addr_any(&ifa->peer_addr)) {
3846 put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { 3823 if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 ||
3847 nlmsg_cancel(skb, nlh); 3824 nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0)
3848 return -EMSGSIZE; 3825 goto error;
3849 } 3826 } else
3827 if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0)
3828 goto error;
3829
3830 if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
3831 goto error;
3850 3832
3851 return nlmsg_end(skb, nlh); 3833 return nlmsg_end(skb, nlh);
3834
3835error:
3836 nlmsg_cancel(skb, nlh);
3837 return -EMSGSIZE;
3852} 3838}
3853 3839
3854static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, 3840static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
@@ -4048,7 +4034,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
4048 struct net *net = sock_net(in_skb->sk); 4034 struct net *net = sock_net(in_skb->sk);
4049 struct ifaddrmsg *ifm; 4035 struct ifaddrmsg *ifm;
4050 struct nlattr *tb[IFA_MAX+1]; 4036 struct nlattr *tb[IFA_MAX+1];
4051 struct in6_addr *addr = NULL; 4037 struct in6_addr *addr = NULL, *peer;
4052 struct net_device *dev = NULL; 4038 struct net_device *dev = NULL;
4053 struct inet6_ifaddr *ifa; 4039 struct inet6_ifaddr *ifa;
4054 struct sk_buff *skb; 4040 struct sk_buff *skb;
@@ -4058,7 +4044,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
4058 if (err < 0) 4044 if (err < 0)
4059 goto errout; 4045 goto errout;
4060 4046
4061 addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); 4047 addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
4062 if (addr == NULL) { 4048 if (addr == NULL) {
4063 err = -EINVAL; 4049 err = -EINVAL;
4064 goto errout; 4050 goto errout;
@@ -4138,6 +4124,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
4138 array[DEVCONF_RTR_SOLICIT_DELAY] = 4124 array[DEVCONF_RTR_SOLICIT_DELAY] =
4139 jiffies_to_msecs(cnf->rtr_solicit_delay); 4125 jiffies_to_msecs(cnf->rtr_solicit_delay);
4140 array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; 4126 array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
4127 array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
4128 jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
4129 array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
4130 jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
4141#ifdef CONFIG_IPV6_PRIVACY 4131#ifdef CONFIG_IPV6_PRIVACY
4142 array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; 4132 array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
4143 array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; 4133 array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
@@ -4168,6 +4158,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
4168 array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; 4158 array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
4169 array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; 4159 array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
4170 array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; 4160 array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
4161 array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
4171} 4162}
4172 4163
4173static inline size_t inet6_ifla6_size(void) 4164static inline size_t inet6_ifla6_size(void)
@@ -4303,6 +4294,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
4303 struct inet6_ifaddr *ifp; 4294 struct inet6_ifaddr *ifp;
4304 struct net_device *dev = idev->dev; 4295 struct net_device *dev = idev->dev;
4305 bool update_rs = false; 4296 bool update_rs = false;
4297 struct in6_addr ll_addr;
4306 4298
4307 if (token == NULL) 4299 if (token == NULL)
4308 return -EINVAL; 4300 return -EINVAL;
@@ -4322,11 +4314,9 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
4322 4314
4323 write_unlock_bh(&idev->lock); 4315 write_unlock_bh(&idev->lock);
4324 4316
4325 if (!idev->dead && (idev->if_flags & IF_READY)) { 4317 if (!idev->dead && (idev->if_flags & IF_READY) &&
4326 struct in6_addr ll_addr; 4318 !ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
4327 4319 IFA_F_OPTIMISTIC)) {
4328 ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
4329 IFA_F_OPTIMISTIC);
4330 4320
4331 /* If we're not ready, then normal ifup will take care 4321 /* If we're not ready, then normal ifup will take care
4332 * of this. Otherwise, we need to request our rs here. 4322 * of this. Otherwise, we need to request our rs here.
@@ -4337,8 +4327,11 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
4337 4327
4338 write_lock_bh(&idev->lock); 4328 write_lock_bh(&idev->lock);
4339 4329
4340 if (update_rs) 4330 if (update_rs) {
4341 idev->if_flags |= IF_RS_SENT; 4331 idev->if_flags |= IF_RS_SENT;
4332 idev->rs_probes = 1;
4333 addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval);
4334 }
4342 4335
4343 /* Well, that's kinda nasty ... */ 4336 /* Well, that's kinda nasty ... */
4344 list_for_each_entry(ifp, &idev->addr_list, if_list) { 4337 list_for_each_entry(ifp, &idev->addr_list, if_list) {
@@ -4351,6 +4344,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
4351 } 4344 }
4352 4345
4353 write_unlock_bh(&idev->lock); 4346 write_unlock_bh(&idev->lock);
4347 addrconf_verify(0);
4354 return 0; 4348 return 0;
4355} 4349}
4356 4350
@@ -4548,6 +4542,19 @@ errout:
4548 rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); 4542 rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
4549} 4543}
4550 4544
4545static void update_valid_ll_addr_cnt(struct inet6_ifaddr *ifp, int count)
4546{
4547 write_lock_bh(&ifp->idev->lock);
4548 spin_lock(&ifp->lock);
4549 if (((ifp->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|
4550 IFA_F_DADFAILED)) == IFA_F_PERMANENT) &&
4551 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL))
4552 ifp->idev->valid_ll_addr_cnt += count;
4553 WARN_ON(ifp->idev->valid_ll_addr_cnt < 0);
4554 spin_unlock(&ifp->lock);
4555 write_unlock_bh(&ifp->idev->lock);
4556}
4557
4551static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 4558static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4552{ 4559{
4553 struct net *net = dev_net(ifp->idev->dev); 4560 struct net *net = dev_net(ifp->idev->dev);
@@ -4556,6 +4563,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4556 4563
4557 switch (event) { 4564 switch (event) {
4558 case RTM_NEWADDR: 4565 case RTM_NEWADDR:
4566 update_valid_ll_addr_cnt(ifp, 1);
4567
4559 /* 4568 /*
4560 * If the address was optimistic 4569 * If the address was optimistic
4561 * we inserted the route at the start of 4570 * we inserted the route at the start of
@@ -4566,11 +4575,28 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4566 ip6_ins_rt(ifp->rt); 4575 ip6_ins_rt(ifp->rt);
4567 if (ifp->idev->cnf.forwarding) 4576 if (ifp->idev->cnf.forwarding)
4568 addrconf_join_anycast(ifp); 4577 addrconf_join_anycast(ifp);
4578 if (!ipv6_addr_any(&ifp->peer_addr))
4579 addrconf_prefix_route(&ifp->peer_addr, 128,
4580 ifp->idev->dev, 0, 0);
4569 break; 4581 break;
4570 case RTM_DELADDR: 4582 case RTM_DELADDR:
4583 update_valid_ll_addr_cnt(ifp, -1);
4584
4571 if (ifp->idev->cnf.forwarding) 4585 if (ifp->idev->cnf.forwarding)
4572 addrconf_leave_anycast(ifp); 4586 addrconf_leave_anycast(ifp);
4573 addrconf_leave_solict(ifp->idev, &ifp->addr); 4587 addrconf_leave_solict(ifp->idev, &ifp->addr);
4588 if (!ipv6_addr_any(&ifp->peer_addr)) {
4589 struct rt6_info *rt;
4590 struct net_device *dev = ifp->idev->dev;
4591
4592 rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL,
4593 dev->ifindex, 1);
4594 if (rt) {
4595 dst_hold(&rt->dst);
4596 if (ip6_del_rt(rt))
4597 dst_free(&rt->dst);
4598 }
4599 }
4574 dst_hold(&ifp->rt->dst); 4600 dst_hold(&ifp->rt->dst);
4575 4601
4576 if (ip6_del_rt(ifp->rt)) 4602 if (ip6_del_rt(ifp->rt))
@@ -4578,6 +4604,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4578 break; 4604 break;
4579 } 4605 }
4580 atomic_inc(&net->ipv6.dev_addr_genid); 4606 atomic_inc(&net->ipv6.dev_addr_genid);
4607 rt_genid_bump_ipv6(net);
4581} 4608}
4582 4609
4583static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 4610static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -4591,13 +4618,13 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4591#ifdef CONFIG_SYSCTL 4618#ifdef CONFIG_SYSCTL
4592 4619
4593static 4620static
4594int addrconf_sysctl_forward(ctl_table *ctl, int write, 4621int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
4595 void __user *buffer, size_t *lenp, loff_t *ppos) 4622 void __user *buffer, size_t *lenp, loff_t *ppos)
4596{ 4623{
4597 int *valp = ctl->data; 4624 int *valp = ctl->data;
4598 int val = *valp; 4625 int val = *valp;
4599 loff_t pos = *ppos; 4626 loff_t pos = *ppos;
4600 ctl_table lctl; 4627 struct ctl_table lctl;
4601 int ret; 4628 int ret;
4602 4629
4603 /* 4630 /*
@@ -4618,13 +4645,16 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write,
4618 4645
4619static void dev_disable_change(struct inet6_dev *idev) 4646static void dev_disable_change(struct inet6_dev *idev)
4620{ 4647{
4648 struct netdev_notifier_info info;
4649
4621 if (!idev || !idev->dev) 4650 if (!idev || !idev->dev)
4622 return; 4651 return;
4623 4652
4653 netdev_notifier_info_init(&info, idev->dev);
4624 if (idev->cnf.disable_ipv6) 4654 if (idev->cnf.disable_ipv6)
4625 addrconf_notify(NULL, NETDEV_DOWN, idev->dev); 4655 addrconf_notify(NULL, NETDEV_DOWN, &info);
4626 else 4656 else
4627 addrconf_notify(NULL, NETDEV_UP, idev->dev); 4657 addrconf_notify(NULL, NETDEV_UP, &info);
4628} 4658}
4629 4659
4630static void addrconf_disable_change(struct net *net, __s32 newf) 4660static void addrconf_disable_change(struct net *net, __s32 newf)
@@ -4673,13 +4703,13 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
4673} 4703}
4674 4704
4675static 4705static
4676int addrconf_sysctl_disable(ctl_table *ctl, int write, 4706int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
4677 void __user *buffer, size_t *lenp, loff_t *ppos) 4707 void __user *buffer, size_t *lenp, loff_t *ppos)
4678{ 4708{
4679 int *valp = ctl->data; 4709 int *valp = ctl->data;
4680 int val = *valp; 4710 int val = *valp;
4681 loff_t pos = *ppos; 4711 loff_t pos = *ppos;
4682 ctl_table lctl; 4712 struct ctl_table lctl;
4683 int ret; 4713 int ret;
4684 4714
4685 /* 4715 /*
@@ -4701,7 +4731,7 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write,
4701static struct addrconf_sysctl_table 4731static struct addrconf_sysctl_table
4702{ 4732{
4703 struct ctl_table_header *sysctl_header; 4733 struct ctl_table_header *sysctl_header;
4704 ctl_table addrconf_vars[DEVCONF_MAX+1]; 4734 struct ctl_table addrconf_vars[DEVCONF_MAX+1];
4705} addrconf_sysctl __read_mostly = { 4735} addrconf_sysctl __read_mostly = {
4706 .sysctl_header = NULL, 4736 .sysctl_header = NULL,
4707 .addrconf_vars = { 4737 .addrconf_vars = {
@@ -4782,6 +4812,22 @@ static struct addrconf_sysctl_table
4782 .mode = 0644, 4812 .mode = 0644,
4783 .proc_handler = proc_dointvec, 4813 .proc_handler = proc_dointvec,
4784 }, 4814 },
4815 {
4816 .procname = "mldv1_unsolicited_report_interval",
4817 .data =
4818 &ipv6_devconf.mldv1_unsolicited_report_interval,
4819 .maxlen = sizeof(int),
4820 .mode = 0644,
4821 .proc_handler = proc_dointvec_ms_jiffies,
4822 },
4823 {
4824 .procname = "mldv2_unsolicited_report_interval",
4825 .data =
4826 &ipv6_devconf.mldv2_unsolicited_report_interval,
4827 .maxlen = sizeof(int),
4828 .mode = 0644,
4829 .proc_handler = proc_dointvec_ms_jiffies,
4830 },
4785#ifdef CONFIG_IPV6_PRIVACY 4831#ifdef CONFIG_IPV6_PRIVACY
4786 { 4832 {
4787 .procname = "use_tempaddr", 4833 .procname = "use_tempaddr",
@@ -4927,6 +4973,13 @@ static struct addrconf_sysctl_table
4927 .proc_handler = proc_dointvec 4973 .proc_handler = proc_dointvec
4928 }, 4974 },
4929 { 4975 {
4976 .procname = "suppress_frag_ndisc",
4977 .data = &ipv6_devconf.suppress_frag_ndisc,
4978 .maxlen = sizeof(int),
4979 .mode = 0644,
4980 .proc_handler = proc_dointvec
4981 },
4982 {
4930 /* sentinel */ 4983 /* sentinel */
4931 } 4984 }
4932 }, 4985 },
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 72104562c864..4c11cbcf8308 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -5,6 +5,8 @@
5 5
6#include <linux/export.h> 6#include <linux/export.h>
7#include <net/ipv6.h> 7#include <net/ipv6.h>
8#include <net/addrconf.h>
9#include <net/ip.h>
8 10
9#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) 11#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
10 12
@@ -97,3 +99,52 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v)
97 return atomic_notifier_call_chain(&inet6addr_chain, val, v); 99 return atomic_notifier_call_chain(&inet6addr_chain, val, v);
98} 100}
99EXPORT_SYMBOL(inet6addr_notifier_call_chain); 101EXPORT_SYMBOL(inet6addr_notifier_call_chain);
102
103const struct ipv6_stub *ipv6_stub __read_mostly;
104EXPORT_SYMBOL_GPL(ipv6_stub);
105
106/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
107const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
108EXPORT_SYMBOL(in6addr_loopback);
109const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
110EXPORT_SYMBOL(in6addr_any);
111const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
112EXPORT_SYMBOL(in6addr_linklocal_allnodes);
113const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
114EXPORT_SYMBOL(in6addr_linklocal_allrouters);
115const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
116EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
117const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
118EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
119const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
120EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
121
122static void snmp6_free_dev(struct inet6_dev *idev)
123{
124 kfree(idev->stats.icmpv6msgdev);
125 kfree(idev->stats.icmpv6dev);
126 snmp_mib_free((void __percpu **)idev->stats.ipv6);
127}
128
129/* Nobody refers to this device, we may destroy it. */
130
131void in6_dev_finish_destroy(struct inet6_dev *idev)
132{
133 struct net_device *dev = idev->dev;
134
135 WARN_ON(!list_empty(&idev->addr_list));
136 WARN_ON(idev->mc_list != NULL);
137 WARN_ON(timer_pending(&idev->rs_timer));
138
139#ifdef NET_REFCNT_DEBUG
140 pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
141#endif
142 dev_put(dev);
143 if (!idev->dead) {
144 pr_warn("Freeing alive inet6 device %p\n", idev);
145 return;
146 }
147 snmp6_free_dev(idev);
148 kfree_rcu(idev, rcu);
149}
150EXPORT_SYMBOL(in6_dev_finish_destroy);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f083a583a05c..b30ad3741b46 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -251,38 +251,36 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
251/* add a label */ 251/* add a label */
252static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 252static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
253{ 253{
254 struct hlist_node *n;
255 struct ip6addrlbl_entry *last = NULL, *p = NULL;
254 int ret = 0; 256 int ret = 0;
255 257
256 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", 258 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
257 __func__, 259 replace);
258 newp, replace);
259 260
260 if (hlist_empty(&ip6addrlbl_table.head)) { 261 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
261 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 262 if (p->prefixlen == newp->prefixlen &&
262 } else { 263 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
263 struct hlist_node *n; 264 p->ifindex == newp->ifindex &&
264 struct ip6addrlbl_entry *p = NULL; 265 ipv6_addr_equal(&p->prefix, &newp->prefix)) {
265 hlist_for_each_entry_safe(p, n, 266 if (!replace) {
266 &ip6addrlbl_table.head, list) { 267 ret = -EEXIST;
267 if (p->prefixlen == newp->prefixlen &&
268 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
269 p->ifindex == newp->ifindex &&
270 ipv6_addr_equal(&p->prefix, &newp->prefix)) {
271 if (!replace) {
272 ret = -EEXIST;
273 goto out;
274 }
275 hlist_replace_rcu(&p->list, &newp->list);
276 ip6addrlbl_put(p);
277 goto out;
278 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
279 (p->prefixlen < newp->prefixlen)) {
280 hlist_add_before_rcu(&newp->list, &p->list);
281 goto out; 268 goto out;
282 } 269 }
270 hlist_replace_rcu(&p->list, &newp->list);
271 ip6addrlbl_put(p);
272 goto out;
273 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
274 (p->prefixlen < newp->prefixlen)) {
275 hlist_add_before_rcu(&newp->list, &p->list);
276 goto out;
283 } 277 }
284 hlist_add_after_rcu(&p->list, &newp->list); 278 last = p;
285 } 279 }
280 if (last)
281 hlist_add_after_rcu(&last->list, &newp->list);
282 else
283 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
286out: 284out:
287 if (!ret) 285 if (!ret)
288 ip6addrlbl_table.seq++; 286 ip6addrlbl_table.seq++;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ab5c7ad482cd..7c96100b021e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,12 +49,14 @@
49#include <net/udp.h> 49#include <net/udp.h>
50#include <net/udplite.h> 50#include <net/udplite.h>
51#include <net/tcp.h> 51#include <net/tcp.h>
52#include <net/ping.h>
52#include <net/protocol.h> 53#include <net/protocol.h>
53#include <net/inet_common.h> 54#include <net/inet_common.h>
54#include <net/route.h> 55#include <net/route.h>
55#include <net/transp_v6.h> 56#include <net/transp_v6.h>
56#include <net/ip6_route.h> 57#include <net/ip6_route.h>
57#include <net/addrconf.h> 58#include <net/addrconf.h>
59#include <net/ndisc.h>
58#ifdef CONFIG_IPV6_TUNNEL 60#ifdef CONFIG_IPV6_TUNNEL
59#include <net/ip6_tunnel.h> 61#include <net/ip6_tunnel.h>
60#endif 62#endif
@@ -765,6 +767,7 @@ static int __net_init inet6_net_init(struct net *net)
765 767
766 net->ipv6.sysctl.bindv6only = 0; 768 net->ipv6.sysctl.bindv6only = 0;
767 net->ipv6.sysctl.icmpv6_time = 1*HZ; 769 net->ipv6.sysctl.icmpv6_time = 1*HZ;
770 atomic_set(&net->ipv6.rt_genid, 0);
768 771
769 err = ipv6_init_mibs(net); 772 err = ipv6_init_mibs(net);
770 if (err) 773 if (err)
@@ -808,6 +811,15 @@ static struct pernet_operations inet6_net_ops = {
808 .exit = inet6_net_exit, 811 .exit = inet6_net_exit,
809}; 812};
810 813
814static const struct ipv6_stub ipv6_stub_impl = {
815 .ipv6_sock_mc_join = ipv6_sock_mc_join,
816 .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
817 .ipv6_dst_lookup = ip6_dst_lookup,
818 .udpv6_encap_enable = udpv6_encap_enable,
819 .ndisc_send_na = ndisc_send_na,
820 .nd_tbl = &nd_tbl,
821};
822
811static int __init inet6_init(void) 823static int __init inet6_init(void)
812{ 824{
813 struct list_head *r; 825 struct list_head *r;
@@ -840,6 +852,9 @@ static int __init inet6_init(void)
840 if (err) 852 if (err)
841 goto out_unregister_udplite_proto; 853 goto out_unregister_udplite_proto;
842 854
855 err = proto_register(&pingv6_prot, 1);
856 if (err)
857 goto out_unregister_ping_proto;
843 858
844 /* We MUST register RAW sockets before we create the ICMP6, 859 /* We MUST register RAW sockets before we create the ICMP6,
845 * IGMP6, or NDISC control sockets. 860 * IGMP6, or NDISC control sockets.
@@ -879,6 +894,9 @@ static int __init inet6_init(void)
879 err = igmp6_init(); 894 err = igmp6_init();
880 if (err) 895 if (err)
881 goto igmp_fail; 896 goto igmp_fail;
897
898 ipv6_stub = &ipv6_stub_impl;
899
882 err = ipv6_netfilter_init(); 900 err = ipv6_netfilter_init();
883 if (err) 901 if (err)
884 goto netfilter_fail; 902 goto netfilter_fail;
@@ -897,6 +915,9 @@ static int __init inet6_init(void)
897 err = ip6_route_init(); 915 err = ip6_route_init();
898 if (err) 916 if (err)
899 goto ip6_route_fail; 917 goto ip6_route_fail;
918 err = ndisc_late_init();
919 if (err)
920 goto ndisc_late_fail;
900 err = ip6_flowlabel_init(); 921 err = ip6_flowlabel_init();
901 if (err) 922 if (err)
902 goto ip6_flowlabel_fail; 923 goto ip6_flowlabel_fail;
@@ -930,6 +951,10 @@ static int __init inet6_init(void)
930 if (err) 951 if (err)
931 goto ipv6_packet_fail; 952 goto ipv6_packet_fail;
932 953
954 err = pingv6_init();
955 if (err)
956 goto pingv6_fail;
957
933#ifdef CONFIG_SYSCTL 958#ifdef CONFIG_SYSCTL
934 err = ipv6_sysctl_register(); 959 err = ipv6_sysctl_register();
935 if (err) 960 if (err)
@@ -942,6 +967,8 @@ out:
942sysctl_fail: 967sysctl_fail:
943 ipv6_packet_cleanup(); 968 ipv6_packet_cleanup();
944#endif 969#endif
970pingv6_fail:
971 pingv6_exit();
945ipv6_packet_fail: 972ipv6_packet_fail:
946 tcpv6_exit(); 973 tcpv6_exit();
947tcpv6_fail: 974tcpv6_fail:
@@ -957,6 +984,8 @@ ipv6_exthdrs_fail:
957addrconf_fail: 984addrconf_fail:
958 ip6_flowlabel_cleanup(); 985 ip6_flowlabel_cleanup();
959ip6_flowlabel_fail: 986ip6_flowlabel_fail:
987 ndisc_late_cleanup();
988ndisc_late_fail:
960 ip6_route_cleanup(); 989 ip6_route_cleanup();
961ip6_route_fail: 990ip6_route_fail:
962#ifdef CONFIG_PROC_FS 991#ifdef CONFIG_PROC_FS
@@ -985,6 +1014,8 @@ register_pernet_fail:
985 rtnl_unregister_all(PF_INET6); 1014 rtnl_unregister_all(PF_INET6);
986out_sock_register_fail: 1015out_sock_register_fail:
987 rawv6_exit(); 1016 rawv6_exit();
1017out_unregister_ping_proto:
1018 proto_unregister(&pingv6_prot);
988out_unregister_raw_proto: 1019out_unregister_raw_proto:
989 proto_unregister(&rawv6_prot); 1020 proto_unregister(&rawv6_prot);
990out_unregister_udplite_proto: 1021out_unregister_udplite_proto:
@@ -1017,6 +1048,7 @@ static void __exit inet6_exit(void)
1017 ipv6_exthdrs_exit(); 1048 ipv6_exthdrs_exit();
1018 addrconf_cleanup(); 1049 addrconf_cleanup();
1019 ip6_flowlabel_cleanup(); 1050 ip6_flowlabel_cleanup();
1051 ndisc_late_cleanup();
1020 ip6_route_cleanup(); 1052 ip6_route_cleanup();
1021#ifdef CONFIG_PROC_FS 1053#ifdef CONFIG_PROC_FS
1022 1054
@@ -1027,6 +1059,7 @@ static void __exit inet6_exit(void)
1027 raw6_proc_exit(); 1059 raw6_proc_exit();
1028#endif 1060#endif
1029 ipv6_netfilter_fini(); 1061 ipv6_netfilter_fini();
1062 ipv6_stub = NULL;
1030 igmp6_cleanup(); 1063 igmp6_cleanup();
1031 ndisc_cleanup(); 1064 ndisc_cleanup();
1032 ip6_mr_cleanup(); 1065 ip6_mr_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index bb02e176cb70..73784c3d4642 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -628,7 +628,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
628 return; 628 return;
629 629
630 if (type == NDISC_REDIRECT) 630 if (type == NDISC_REDIRECT)
631 ip6_redirect(skb, net, 0, 0); 631 ip6_redirect(skb, net, skb->dev->ifindex, 0);
632 else 632 else
633 ip6_update_pmtu(skb, net, info, 0, 0); 633 ip6_update_pmtu(skb, net, info, 0, 0);
634 xfrm_state_put(x); 634 xfrm_state_put(x);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 4b56cbbc7890..48b6bd2a9a14 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -879,3 +879,30 @@ exit_f:
879 return err; 879 return err;
880} 880}
881EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); 881EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
882
883void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
884 __u16 srcp, __u16 destp, int bucket)
885{
886 struct ipv6_pinfo *np = inet6_sk(sp);
887 const struct in6_addr *dest, *src;
888
889 dest = &np->daddr;
890 src = &np->rcv_saddr;
891 seq_printf(seq,
892 "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
893 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
894 bucket,
895 src->s6_addr32[0], src->s6_addr32[1],
896 src->s6_addr32[2], src->s6_addr32[3], srcp,
897 dest->s6_addr32[0], dest->s6_addr32[1],
898 dest->s6_addr32[2], dest->s6_addr32[3], destp,
899 sp->sk_state,
900 sk_wmem_alloc_get(sp),
901 sk_rmem_alloc_get(sp),
902 0, 0L, 0,
903 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
904 0,
905 sock_i_ino(sp),
906 atomic_read(&sp->sk_refcnt), sp,
907 atomic_read(&sp->sk_drops));
908}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 40ffd72243a4..d3618a78fcac 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -425,7 +425,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
425 net_adj = 0; 425 net_adj = 0;
426 426
427 return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - 427 return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
428 net_adj) & ~(align - 1)) + (net_adj - 2); 428 net_adj) & ~(align - 1)) + net_adj - 2;
429} 429}
430 430
431static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 431static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -447,7 +447,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
447 return; 447 return;
448 448
449 if (type == NDISC_REDIRECT) 449 if (type == NDISC_REDIRECT)
450 ip6_redirect(skb, net, 0, 0); 450 ip6_redirect(skb, net, skb->dev->ifindex, 0);
451 else 451 else
452 ip6_update_pmtu(skb, net, info, 0, 0); 452 ip6_update_pmtu(skb, net, info, 0, 0);
453 xfrm_state_put(x); 453 xfrm_state_put(x);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 07a7d65a7cb6..8d67900aa003 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -162,12 +162,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
162 off += optlen; 162 off += optlen;
163 len -= optlen; 163 len -= optlen;
164 } 164 }
165 /* This case will not be caught by above check since its padding
166 * length is smaller than 7:
167 * 1 byte NH + 1 byte Length + 6 bytes Padding
168 */
169 if ((padlen == 6) && ((off - skb_network_header_len(skb)) == 8))
170 goto bad;
171 165
172 if (len == 0) 166 if (len == 0)
173 return true; 167 return true;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index c5e83fae4df4..140748debc4a 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -115,7 +115,7 @@ EXPORT_SYMBOL(ipv6_skip_exthdr);
115int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) 115int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
116{ 116{
117 const unsigned char *nh = skb_network_header(skb); 117 const unsigned char *nh = skb_network_header(skb);
118 int packet_len = skb->tail - skb->network_header; 118 int packet_len = skb_tail_pointer(skb) - skb_network_header(skb);
119 struct ipv6_opt_hdr *hdr; 119 struct ipv6_opt_hdr *hdr;
120 int len; 120 int len;
121 121
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2e1a432867c0..e27591635f92 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -55,26 +55,33 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
55 struct fib6_table *table; 55 struct fib6_table *table;
56 struct net *net = rule->fr_net; 56 struct net *net = rule->fr_net;
57 pol_lookup_t lookup = arg->lookup_ptr; 57 pol_lookup_t lookup = arg->lookup_ptr;
58 int err = 0;
58 59
59 switch (rule->action) { 60 switch (rule->action) {
60 case FR_ACT_TO_TBL: 61 case FR_ACT_TO_TBL:
61 break; 62 break;
62 case FR_ACT_UNREACHABLE: 63 case FR_ACT_UNREACHABLE:
64 err = -ENETUNREACH;
63 rt = net->ipv6.ip6_null_entry; 65 rt = net->ipv6.ip6_null_entry;
64 goto discard_pkt; 66 goto discard_pkt;
65 default: 67 default:
66 case FR_ACT_BLACKHOLE: 68 case FR_ACT_BLACKHOLE:
69 err = -EINVAL;
67 rt = net->ipv6.ip6_blk_hole_entry; 70 rt = net->ipv6.ip6_blk_hole_entry;
68 goto discard_pkt; 71 goto discard_pkt;
69 case FR_ACT_PROHIBIT: 72 case FR_ACT_PROHIBIT:
73 err = -EACCES;
70 rt = net->ipv6.ip6_prohibit_entry; 74 rt = net->ipv6.ip6_prohibit_entry;
71 goto discard_pkt; 75 goto discard_pkt;
72 } 76 }
73 77
74 table = fib6_get_table(net, rule->table); 78 table = fib6_get_table(net, rule->table);
75 if (table) 79 if (!table) {
76 rt = lookup(net, table, flp6, flags); 80 err = -EAGAIN;
81 goto out;
82 }
77 83
84 rt = lookup(net, table, flp6, flags);
78 if (rt != net->ipv6.ip6_null_entry) { 85 if (rt != net->ipv6.ip6_null_entry) {
79 struct fib6_rule *r = (struct fib6_rule *)rule; 86 struct fib6_rule *r = (struct fib6_rule *)rule;
80 87
@@ -101,6 +108,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
101 } 108 }
102again: 109again:
103 ip6_rt_put(rt); 110 ip6_rt_put(rt);
111 err = -EAGAIN;
104 rt = NULL; 112 rt = NULL;
105 goto out; 113 goto out;
106 114
@@ -108,9 +116,31 @@ discard_pkt:
108 dst_hold(&rt->dst); 116 dst_hold(&rt->dst);
109out: 117out:
110 arg->result = rt; 118 arg->result = rt;
111 return rt == NULL ? -EAGAIN : 0; 119 return err;
112} 120}
113 121
122static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
123{
124 struct rt6_info *rt = (struct rt6_info *) arg->result;
125 struct net_device *dev = rt->rt6i_idev->dev;
126 /* do not accept result if the route does
127 * not meet the required prefix length
128 */
129 if (rt->rt6i_dst.plen <= rule->suppress_prefixlen)
130 goto suppress_route;
131
132 /* do not accept result if the route uses a device
133 * belonging to a forbidden interface group
134 */
135 if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
136 goto suppress_route;
137
138 return false;
139
140suppress_route:
141 ip6_rt_put(rt);
142 return true;
143}
114 144
115static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 145static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
116{ 146{
@@ -244,6 +274,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
244 .addr_size = sizeof(struct in6_addr), 274 .addr_size = sizeof(struct in6_addr),
245 .action = fib6_rule_action, 275 .action = fib6_rule_action,
246 .match = fib6_rule_match, 276 .match = fib6_rule_match,
277 .suppress = fib6_rule_suppress,
247 .configure = fib6_rule_configure, 278 .configure = fib6_rule_configure,
248 .compare = fib6_rule_compare, 279 .compare = fib6_rule_compare,
249 .fill = fib6_rule_fill, 280 .fill = fib6_rule_fill,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b4ff0a42b8c7..eef8d945b362 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -57,6 +57,7 @@
57 57
58#include <net/ipv6.h> 58#include <net/ipv6.h>
59#include <net/ip6_checksum.h> 59#include <net/ip6_checksum.h>
60#include <net/ping.h>
60#include <net/protocol.h> 61#include <net/protocol.h>
61#include <net/raw.h> 62#include <net/raw.h>
62#include <net/rawv6.h> 63#include <net/rawv6.h>
@@ -84,12 +85,18 @@ static inline struct sock *icmpv6_sk(struct net *net)
84static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 85static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
85 u8 type, u8 code, int offset, __be32 info) 86 u8 type, u8 code, int offset, __be32 info)
86{ 87{
88 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
89 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
87 struct net *net = dev_net(skb->dev); 90 struct net *net = dev_net(skb->dev);
88 91
89 if (type == ICMPV6_PKT_TOOBIG) 92 if (type == ICMPV6_PKT_TOOBIG)
90 ip6_update_pmtu(skb, net, info, 0, 0); 93 ip6_update_pmtu(skb, net, info, 0, 0);
91 else if (type == NDISC_REDIRECT) 94 else if (type == NDISC_REDIRECT)
92 ip6_redirect(skb, net, 0, 0); 95 ip6_redirect(skb, net, skb->dev->ifindex, 0);
96
97 if (!(type & ICMPV6_INFOMSG_MASK))
98 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
99 ping_err(skb, offset, info);
93} 100}
94 101
95static int icmpv6_rcv(struct sk_buff *skb); 102static int icmpv6_rcv(struct sk_buff *skb);
@@ -224,7 +231,8 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
224 return (*op & 0xC0) == 0x80; 231 return (*op & 0xC0) == 0x80;
225} 232}
226 233
227static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) 234int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
235 struct icmp6hdr *thdr, int len)
228{ 236{
229 struct sk_buff *skb; 237 struct sk_buff *skb;
230 struct icmp6hdr *icmp6h; 238 struct icmp6hdr *icmp6h;
@@ -307,8 +315,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
307static inline void mip6_addr_swap(struct sk_buff *skb) {} 315static inline void mip6_addr_swap(struct sk_buff *skb) {}
308#endif 316#endif
309 317
310static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, 318struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
311 struct sock *sk, struct flowi6 *fl6) 319 struct sock *sk, struct flowi6 *fl6)
312{ 320{
313 struct dst_entry *dst, *dst2; 321 struct dst_entry *dst, *dst2;
314 struct flowi6 fl2; 322 struct flowi6 fl2;
@@ -391,7 +399,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
391 int err = 0; 399 int err = 0;
392 400
393 if ((u8 *)hdr < skb->head || 401 if ((u8 *)hdr < skb->head ||
394 (skb->network_header + sizeof(*hdr)) > skb->tail) 402 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
395 return; 403 return;
396 404
397 /* 405 /*
@@ -697,7 +705,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
697 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, 705 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
698 IPPROTO_ICMPV6, 0)); 706 IPPROTO_ICMPV6, 0));
699 if (__skb_checksum_complete(skb)) { 707 if (__skb_checksum_complete(skb)) {
700 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", 708 LIMIT_NETDEBUG(KERN_DEBUG
709 "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
701 saddr, daddr); 710 saddr, daddr);
702 goto csum_error; 711 goto csum_error;
703 } 712 }
@@ -718,7 +727,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
718 break; 727 break;
719 728
720 case ICMPV6_ECHO_REPLY: 729 case ICMPV6_ECHO_REPLY:
721 /* we couldn't care less */ 730 ping_rcv(skb);
722 break; 731 break;
723 732
724 case ICMPV6_PKT_TOOBIG: 733 case ICMPV6_PKT_TOOBIG:
@@ -931,6 +940,14 @@ static const struct icmp6_err {
931 .err = ECONNREFUSED, 940 .err = ECONNREFUSED,
932 .fatal = 1, 941 .fatal = 1,
933 }, 942 },
943 { /* POLICY_FAIL */
944 .err = EACCES,
945 .fatal = 1,
946 },
947 { /* REJECT_ROUTE */
948 .err = EACCES,
949 .fatal = 1,
950 },
934}; 951};
935 952
936int icmpv6_err_convert(u8 type, u8 code, int *err) 953int icmpv6_err_convert(u8 type, u8 code, int *err)
@@ -942,7 +959,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
942 switch (type) { 959 switch (type) {
943 case ICMPV6_DEST_UNREACH: 960 case ICMPV6_DEST_UNREACH:
944 fatal = 1; 961 fatal = 1;
945 if (code <= ICMPV6_PORT_UNREACH) { 962 if (code < ARRAY_SIZE(tab_unreach)) {
946 *err = tab_unreach[code].err; 963 *err = tab_unreach[code].err;
947 fatal = tab_unreach[code].fatal; 964 fatal = tab_unreach[code].fatal;
948 } 965 }
@@ -967,7 +984,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
967EXPORT_SYMBOL(icmpv6_err_convert); 984EXPORT_SYMBOL(icmpv6_err_convert);
968 985
969#ifdef CONFIG_SYSCTL 986#ifdef CONFIG_SYSCTL
970ctl_table ipv6_icmp_table_template[] = { 987struct ctl_table ipv6_icmp_table_template[] = {
971 { 988 {
972 .procname = "ratelimit", 989 .procname = "ratelimit",
973 .data = &init_net.ipv6.sysctl.icmpv6_time, 990 .data = &init_net.ipv6.sysctl.icmpv6_time,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 192dd1a0e188..5bec666aba61 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -425,8 +425,8 @@ out:
425 * node. 425 * node.
426 */ 426 */
427 427
428static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, 428static struct fib6_node *fib6_add_1(struct fib6_node *root,
429 int addrlen, int plen, 429 struct in6_addr *addr, int plen,
430 int offset, int allow_create, 430 int offset, int allow_create,
431 int replace_required) 431 int replace_required)
432{ 432{
@@ -543,7 +543,7 @@ insert_above:
543 but if it is >= plen, the value is ignored in any case. 543 but if it is >= plen, the value is ignored in any case.
544 */ 544 */
545 545
546 bit = __ipv6_addr_diff(addr, &key->addr, addrlen); 546 bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
547 547
548 /* 548 /*
549 * (intermediate)[in] 549 * (intermediate)[in]
@@ -632,6 +632,12 @@ insert_above:
632 return ln; 632 return ln;
633} 633}
634 634
635static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
636{
637 return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
638 RTF_GATEWAY;
639}
640
635/* 641/*
636 * Insert routing information in a node. 642 * Insert routing information in a node.
637 */ 643 */
@@ -646,6 +652,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
646 int add = (!info->nlh || 652 int add = (!info->nlh ||
647 (info->nlh->nlmsg_flags & NLM_F_CREATE)); 653 (info->nlh->nlmsg_flags & NLM_F_CREATE));
648 int found = 0; 654 int found = 0;
655 bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
649 656
650 ins = &fn->leaf; 657 ins = &fn->leaf;
651 658
@@ -691,9 +698,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
691 * To avoid long list, we only had siblings if the 698 * To avoid long list, we only had siblings if the
692 * route have a gateway. 699 * route have a gateway.
693 */ 700 */
694 if (rt->rt6i_flags & RTF_GATEWAY && 701 if (rt_can_ecmp &&
695 !(rt->rt6i_flags & RTF_EXPIRES) && 702 rt6_qualify_for_ecmp(iter))
696 !(iter->rt6i_flags & RTF_EXPIRES))
697 rt->rt6i_nsiblings++; 703 rt->rt6i_nsiblings++;
698 } 704 }
699 705
@@ -715,7 +721,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
715 /* Find the first route that have the same metric */ 721 /* Find the first route that have the same metric */
716 sibling = fn->leaf; 722 sibling = fn->leaf;
717 while (sibling) { 723 while (sibling) {
718 if (sibling->rt6i_metric == rt->rt6i_metric) { 724 if (sibling->rt6i_metric == rt->rt6i_metric &&
725 rt6_qualify_for_ecmp(sibling)) {
719 list_add_tail(&rt->rt6i_siblings, 726 list_add_tail(&rt->rt6i_siblings,
720 &sibling->rt6i_siblings); 727 &sibling->rt6i_siblings);
721 break; 728 break;
@@ -815,12 +822,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
815 if (!allow_create && !replace_required) 822 if (!allow_create && !replace_required)
816 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); 823 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
817 824
818 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), 825 fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
819 rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), 826 offsetof(struct rt6_info, rt6i_dst), allow_create,
820 allow_create, replace_required); 827 replace_required);
821
822 if (IS_ERR(fn)) { 828 if (IS_ERR(fn)) {
823 err = PTR_ERR(fn); 829 err = PTR_ERR(fn);
830 fn = NULL;
824 goto out; 831 goto out;
825 } 832 }
826 833
@@ -856,7 +863,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
856 /* Now add the first leaf node to new subtree */ 863 /* Now add the first leaf node to new subtree */
857 864
858 sn = fib6_add_1(sfn, &rt->rt6i_src.addr, 865 sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
859 sizeof(struct in6_addr), rt->rt6i_src.plen, 866 rt->rt6i_src.plen,
860 offsetof(struct rt6_info, rt6i_src), 867 offsetof(struct rt6_info, rt6i_src),
861 allow_create, replace_required); 868 allow_create, replace_required);
862 869
@@ -875,7 +882,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
875 fn->subtree = sfn; 882 fn->subtree = sfn;
876 } else { 883 } else {
877 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, 884 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
878 sizeof(struct in6_addr), rt->rt6i_src.plen, 885 rt->rt6i_src.plen,
879 offsetof(struct rt6_info, rt6i_src), 886 offsetof(struct rt6_info, rt6i_src),
880 allow_create, replace_required); 887 allow_create, replace_required);
881 888
@@ -986,14 +993,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
986 993
987 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { 994 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
988#ifdef CONFIG_IPV6_SUBTREES 995#ifdef CONFIG_IPV6_SUBTREES
989 if (fn->subtree) 996 if (fn->subtree) {
990 fn = fib6_lookup_1(fn->subtree, args + 1); 997 struct fib6_node *sfn;
998 sfn = fib6_lookup_1(fn->subtree,
999 args + 1);
1000 if (!sfn)
1001 goto backtrack;
1002 fn = sfn;
1003 }
991#endif 1004#endif
992 if (!fn || fn->fn_flags & RTN_RTINFO) 1005 if (fn->fn_flags & RTN_RTINFO)
993 return fn; 1006 return fn;
994 } 1007 }
995 } 1008 }
996 1009#ifdef CONFIG_IPV6_SUBTREES
1010backtrack:
1011#endif
997 if (fn->fn_flags & RTN_ROOT) 1012 if (fn->fn_flags & RTN_ROOT)
998 break; 1013 break;
999 1014
@@ -1625,27 +1640,28 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1625 1640
1626static DEFINE_SPINLOCK(fib6_gc_lock); 1641static DEFINE_SPINLOCK(fib6_gc_lock);
1627 1642
1628void fib6_run_gc(unsigned long expires, struct net *net) 1643void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1629{ 1644{
1630 if (expires != ~0UL) { 1645 unsigned long now;
1646
1647 if (force) {
1631 spin_lock_bh(&fib6_gc_lock); 1648 spin_lock_bh(&fib6_gc_lock);
1632 gc_args.timeout = expires ? (int)expires : 1649 } else if (!spin_trylock_bh(&fib6_gc_lock)) {
1633 net->ipv6.sysctl.ip6_rt_gc_interval; 1650 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
1634 } else { 1651 return;
1635 if (!spin_trylock_bh(&fib6_gc_lock)) {
1636 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
1637 return;
1638 }
1639 gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
1640 } 1652 }
1653 gc_args.timeout = expires ? (int)expires :
1654 net->ipv6.sysctl.ip6_rt_gc_interval;
1641 1655
1642 gc_args.more = icmp6_dst_gc(); 1656 gc_args.more = icmp6_dst_gc();
1643 1657
1644 fib6_clean_all(net, fib6_age, 0, NULL); 1658 fib6_clean_all(net, fib6_age, 0, NULL);
1659 now = jiffies;
1660 net->ipv6.ip6_rt_last_gc = now;
1645 1661
1646 if (gc_args.more) 1662 if (gc_args.more)
1647 mod_timer(&net->ipv6.ip6_fib_timer, 1663 mod_timer(&net->ipv6.ip6_fib_timer,
1648 round_jiffies(jiffies 1664 round_jiffies(now
1649 + net->ipv6.sysctl.ip6_rt_gc_interval)); 1665 + net->ipv6.sysctl.ip6_rt_gc_interval));
1650 else 1666 else
1651 del_timer(&net->ipv6.ip6_fib_timer); 1667 del_timer(&net->ipv6.ip6_fib_timer);
@@ -1654,7 +1670,7 @@ void fib6_run_gc(unsigned long expires, struct net *net)
1654 1670
1655static void fib6_gc_timer_cb(unsigned long arg) 1671static void fib6_gc_timer_cb(unsigned long arg)
1656{ 1672{
1657 fib6_run_gc(0, (struct net *)arg); 1673 fib6_run_gc(0, (struct net *)arg, true);
1658} 1674}
1659 1675
1660static int __net_init fib6_net_init(struct net *net) 1676static int __net_init fib6_net_init(struct net *net)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index ecd60733e5e2..7bb5446b9d73 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -335,6 +335,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
335 dev->rtnl_link_ops = &ip6gre_link_ops; 335 dev->rtnl_link_ops = &ip6gre_link_ops;
336 336
337 nt->dev = dev; 337 nt->dev = dev;
338 nt->net = dev_net(dev);
338 ip6gre_tnl_link_config(nt, 1); 339 ip6gre_tnl_link_config(nt, 1);
339 340
340 if (register_netdevice(dev) < 0) 341 if (register_netdevice(dev) < 0)
@@ -508,8 +509,6 @@ static int ip6gre_rcv(struct sk_buff *skb)
508 goto drop; 509 goto drop;
509 } 510 }
510 511
511 secpath_reset(skb);
512
513 skb->protocol = gre_proto; 512 skb->protocol = gre_proto;
514 /* WCCP version 1 and 2 protocol decoding. 513 /* WCCP version 1 and 2 protocol decoding.
515 * - Change protocol to IP 514 * - Change protocol to IP
@@ -524,7 +523,6 @@ static int ip6gre_rcv(struct sk_buff *skb)
524 skb->mac_header = skb->network_header; 523 skb->mac_header = skb->network_header;
525 __pskb_pull(skb, offset); 524 __pskb_pull(skb, offset);
526 skb_postpull_rcsum(skb, skb_transport_header(skb), offset); 525 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
527 skb->pkt_type = PACKET_HOST;
528 526
529 if (((flags&GRE_CSUM) && csum) || 527 if (((flags&GRE_CSUM) && csum) ||
530 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 528 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
@@ -556,7 +554,7 @@ static int ip6gre_rcv(struct sk_buff *skb)
556 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 554 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
557 } 555 }
558 556
559 __skb_tunnel_rx(skb, tunnel->dev); 557 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
560 558
561 skb_reset_network_header(skb); 559 skb_reset_network_header(skb);
562 560
@@ -620,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
620 struct ip6_tnl *tunnel = netdev_priv(dev); 618 struct ip6_tnl *tunnel = netdev_priv(dev);
621 struct net_device *tdev; /* Device to other host */ 619 struct net_device *tdev; /* Device to other host */
622 struct ipv6hdr *ipv6h; /* Our new IP header */ 620 struct ipv6hdr *ipv6h; /* Our new IP header */
623 unsigned int max_headroom; /* The extra header space needed */ 621 unsigned int max_headroom = 0; /* The extra header space needed */
624 int gre_hlen; 622 int gre_hlen;
625 struct ipv6_tel_txoption opt; 623 struct ipv6_tel_txoption opt;
626 int mtu; 624 int mtu;
@@ -693,7 +691,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
693 tunnel->err_count = 0; 691 tunnel->err_count = 0;
694 } 692 }
695 693
696 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; 694 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
695
696 max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
697 697
698 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 698 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
699 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 699 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -709,8 +709,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
709 skb = new_skb; 709 skb = new_skb;
710 } 710 }
711 711
712 skb_dst_drop(skb);
713
714 if (fl6->flowi6_mark) { 712 if (fl6->flowi6_mark) {
715 skb_dst_set(skb, dst); 713 skb_dst_set(skb, dst);
716 ndst = NULL; 714 ndst = NULL;
@@ -724,6 +722,11 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
724 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); 722 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
725 } 723 }
726 724
725 if (likely(!skb->encapsulation)) {
726 skb_reset_inner_headers(skb);
727 skb->encapsulation = 1;
728 }
729
727 skb_push(skb, gre_hlen); 730 skb_push(skb, gre_hlen);
728 skb_reset_network_header(skb); 731 skb_reset_network_header(skb);
729 skb_set_transport_header(skb, sizeof(*ipv6h)); 732 skb_set_transport_header(skb, sizeof(*ipv6h));
@@ -1255,6 +1258,7 @@ static int ip6gre_tunnel_init(struct net_device *dev)
1255 tunnel = netdev_priv(dev); 1258 tunnel = netdev_priv(dev);
1256 1259
1257 tunnel->dev = dev; 1260 tunnel->dev = dev;
1261 tunnel->net = dev_net(dev);
1258 strcpy(tunnel->parms.name, dev->name); 1262 strcpy(tunnel->parms.name, dev->name);
1259 1263
1260 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); 1264 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
@@ -1275,6 +1279,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
1275 struct ip6_tnl *tunnel = netdev_priv(dev); 1279 struct ip6_tnl *tunnel = netdev_priv(dev);
1276 1280
1277 tunnel->dev = dev; 1281 tunnel->dev = dev;
1282 tunnel->net = dev_net(dev);
1278 strcpy(tunnel->parms.name, dev->name); 1283 strcpy(tunnel->parms.name, dev->name);
1279 1284
1280 tunnel->hlen = sizeof(struct ipv6hdr) + 4; 1285 tunnel->hlen = sizeof(struct ipv6hdr) + 4;
@@ -1450,6 +1455,7 @@ static int ip6gre_tap_init(struct net_device *dev)
1450 tunnel = netdev_priv(dev); 1455 tunnel = netdev_priv(dev);
1451 1456
1452 tunnel->dev = dev; 1457 tunnel->dev = dev;
1458 tunnel->net = dev_net(dev);
1453 strcpy(tunnel->parms.name, dev->name); 1459 strcpy(tunnel->parms.name, dev->name);
1454 1460
1455 ip6gre_tnl_link_config(tunnel, 1); 1461 ip6gre_tnl_link_config(tunnel, 1);
@@ -1501,6 +1507,7 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1501 eth_hw_addr_random(dev); 1507 eth_hw_addr_random(dev);
1502 1508
1503 nt->dev = dev; 1509 nt->dev = dev;
1510 nt->net = dev_net(dev);
1504 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); 1511 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1505 1512
1506 /* Can use a lockless transmit, unless we generate output sequences */ 1513 /* Can use a lockless transmit, unless we generate output sequences */
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 2bab2aa59745..302d6fb1ff2b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -44,7 +44,7 @@
44#include <net/ip6_route.h> 44#include <net/ip6_route.h>
45#include <net/addrconf.h> 45#include <net/addrconf.h>
46#include <net/xfrm.h> 46#include <net/xfrm.h>
47 47#include <net/inet_ecn.h>
48 48
49 49
50int ip6_rcv_finish(struct sk_buff *skb) 50int ip6_rcv_finish(struct sk_buff *skb)
@@ -109,6 +109,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
109 if (hdr->version != 6) 109 if (hdr->version != 6)
110 goto err; 110 goto err;
111 111
112 IP6_ADD_STATS_BH(dev_net(dev), idev,
113 IPSTATS_MIB_NOECTPKTS +
114 (ipv6_get_dsfield(hdr) & INET_ECN_MASK),
115 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
112 /* 116 /*
113 * RFC4291 2.5.3 117 * RFC4291 2.5.3
114 * A packet received on an interface with a destination address 118 * A packet received on an interface with a destination address
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 71b766ee821d..d82de7228100 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -91,6 +91,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
91 unsigned int unfrag_ip6hlen; 91 unsigned int unfrag_ip6hlen;
92 u8 *prevhdr; 92 u8 *prevhdr;
93 int offset = 0; 93 int offset = 0;
94 bool tunnel;
94 95
95 if (unlikely(skb_shinfo(skb)->gso_type & 96 if (unlikely(skb_shinfo(skb)->gso_type &
96 ~(SKB_GSO_UDP | 97 ~(SKB_GSO_UDP |
@@ -98,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
98 SKB_GSO_TCP_ECN | 99 SKB_GSO_TCP_ECN |
99 SKB_GSO_GRE | 100 SKB_GSO_GRE |
100 SKB_GSO_UDP_TUNNEL | 101 SKB_GSO_UDP_TUNNEL |
102 SKB_GSO_MPLS |
101 SKB_GSO_TCPV6 | 103 SKB_GSO_TCPV6 |
102 0))) 104 0)))
103 goto out; 105 goto out;
@@ -105,6 +107,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
105 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) 107 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
106 goto out; 108 goto out;
107 109
110 tunnel = skb->encapsulation;
108 ipv6h = ipv6_hdr(skb); 111 ipv6h = ipv6_hdr(skb);
109 __skb_pull(skb, sizeof(*ipv6h)); 112 __skb_pull(skb, sizeof(*ipv6h));
110 segs = ERR_PTR(-EPROTONOSUPPORT); 113 segs = ERR_PTR(-EPROTONOSUPPORT);
@@ -125,7 +128,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
125 ipv6h = ipv6_hdr(skb); 128 ipv6h = ipv6_hdr(skb);
126 ipv6h->payload_len = htons(skb->len - skb->mac_len - 129 ipv6h->payload_len = htons(skb->len - skb->mac_len -
127 sizeof(*ipv6h)); 130 sizeof(*ipv6h));
128 if (proto == IPPROTO_UDP) { 131 if (!tunnel && proto == IPPROTO_UDP) {
129 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); 132 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
130 fptr = (struct frag_hdr *)(skb_network_header(skb) + 133 fptr = (struct frag_hdr *)(skb_network_header(skb) +
131 unfrag_ip6hlen); 134 unfrag_ip6hlen);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index dae1949019d7..a54c45ce4a48 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,31 +56,6 @@
56#include <net/checksum.h> 56#include <net/checksum.h>
57#include <linux/mroute6.h> 57#include <linux/mroute6.h>
58 58
59int __ip6_local_out(struct sk_buff *skb)
60{
61 int len;
62
63 len = skb->len - sizeof(struct ipv6hdr);
64 if (len > IPV6_MAXPLEN)
65 len = 0;
66 ipv6_hdr(skb)->payload_len = htons(len);
67
68 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
69 skb_dst(skb)->dev, dst_output);
70}
71
72int ip6_local_out(struct sk_buff *skb)
73{
74 int err;
75
76 err = __ip6_local_out(skb);
77 if (likely(err == 1))
78 err = dst_output(skb);
79
80 return err;
81}
82EXPORT_SYMBOL_GPL(ip6_local_out);
83
84static int ip6_finish_output2(struct sk_buff *skb) 59static int ip6_finish_output2(struct sk_buff *skb)
85{ 60{
86 struct dst_entry *dst = skb_dst(skb); 61 struct dst_entry *dst = skb_dst(skb);
@@ -238,6 +213,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
238 hdr->saddr = fl6->saddr; 213 hdr->saddr = fl6->saddr;
239 hdr->daddr = *first_hop; 214 hdr->daddr = *first_hop;
240 215
216 skb->protocol = htons(ETH_P_IPV6);
241 skb->priority = sk->sk_priority; 217 skb->priority = sk->sk_priority;
242 skb->mark = sk->sk_mark; 218 skb->mark = sk->sk_mark;
243 219
@@ -381,9 +357,8 @@ int ip6_forward(struct sk_buff *skb)
381 * cannot be fragmented, because there is no warranty 357 * cannot be fragmented, because there is no warranty
382 * that different fragments will go along one path. --ANK 358 * that different fragments will go along one path. --ANK
383 */ 359 */
384 if (opt->ra) { 360 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
385 u8 *ptr = skb_network_header(skb) + opt->ra; 361 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
386 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
387 return 0; 362 return 0;
388 } 363 }
389 364
@@ -822,11 +797,17 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
822 const struct flowi6 *fl6) 797 const struct flowi6 *fl6)
823{ 798{
824 struct ipv6_pinfo *np = inet6_sk(sk); 799 struct ipv6_pinfo *np = inet6_sk(sk);
825 struct rt6_info *rt = (struct rt6_info *)dst; 800 struct rt6_info *rt;
826 801
827 if (!dst) 802 if (!dst)
828 goto out; 803 goto out;
829 804
805 if (dst->ops->family != AF_INET6) {
806 dst_release(dst);
807 return NULL;
808 }
809
810 rt = (struct rt6_info *)dst;
830 /* Yes, checking route validity in not connected 811 /* Yes, checking route validity in not connected
831 * case is not very simple. Take into account, 812 * case is not very simple. Take into account,
832 * that we do not support routing by source, TOS, 813 * that we do not support routing by source, TOS,
@@ -1034,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1034 * udp datagram 1015 * udp datagram
1035 */ 1016 */
1036 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1017 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1018 struct frag_hdr fhdr;
1019
1037 skb = sock_alloc_send_skb(sk, 1020 skb = sock_alloc_send_skb(sk,
1038 hh_len + fragheaderlen + transhdrlen + 20, 1021 hh_len + fragheaderlen + transhdrlen + 20,
1039 (flags & MSG_DONTWAIT), &err); 1022 (flags & MSG_DONTWAIT), &err);
@@ -1052,14 +1035,9 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1052 /* initialize protocol header pointer */ 1035 /* initialize protocol header pointer */
1053 skb->transport_header = skb->network_header + fragheaderlen; 1036 skb->transport_header = skb->network_header + fragheaderlen;
1054 1037
1038 skb->protocol = htons(ETH_P_IPV6);
1055 skb->ip_summed = CHECKSUM_PARTIAL; 1039 skb->ip_summed = CHECKSUM_PARTIAL;
1056 skb->csum = 0; 1040 skb->csum = 0;
1057 }
1058
1059 err = skb_append_datato_frags(sk,skb, getfrag, from,
1060 (length - transhdrlen));
1061 if (!err) {
1062 struct frag_hdr fhdr;
1063 1041
1064 /* Specify the length of each IPv6 datagram fragment. 1042 /* Specify the length of each IPv6 datagram fragment.
1065 * It has to be a multiple of 8. 1043 * It has to be a multiple of 8.
@@ -1070,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1070 ipv6_select_ident(&fhdr, rt); 1048 ipv6_select_ident(&fhdr, rt);
1071 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1049 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1072 __skb_queue_tail(&sk->sk_write_queue, skb); 1050 __skb_queue_tail(&sk->sk_write_queue, skb);
1073
1074 return 0;
1075 } 1051 }
1076 /* There is not enough support do UPD LSO,
1077 * so follow normal path
1078 */
1079 kfree_skb(skb);
1080 1052
1081 return err; 1053 return skb_append_datato_frags(sk, skb, getfrag, from,
1054 (length - transhdrlen));
1082} 1055}
1083 1056
1084static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1057static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1093,11 +1066,12 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1093 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1066 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1094} 1067}
1095 1068
1096static void ip6_append_data_mtu(int *mtu, 1069static void ip6_append_data_mtu(unsigned int *mtu,
1097 int *maxfraglen, 1070 int *maxfraglen,
1098 unsigned int fragheaderlen, 1071 unsigned int fragheaderlen,
1099 struct sk_buff *skb, 1072 struct sk_buff *skb,
1100 struct rt6_info *rt) 1073 struct rt6_info *rt,
1074 bool pmtuprobe)
1101{ 1075{
1102 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1076 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1103 if (skb == NULL) { 1077 if (skb == NULL) {
@@ -1109,7 +1083,9 @@ static void ip6_append_data_mtu(int *mtu,
1109 * this fragment is not first, the headers 1083 * this fragment is not first, the headers
1110 * space is regarded as data space. 1084 * space is regarded as data space.
1111 */ 1085 */
1112 *mtu = dst_mtu(rt->dst.path); 1086 *mtu = min(*mtu, pmtuprobe ?
1087 rt->dst.dev->mtu :
1088 dst_mtu(rt->dst.path));
1113 } 1089 }
1114 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1090 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1115 + fragheaderlen - sizeof(struct frag_hdr); 1091 + fragheaderlen - sizeof(struct frag_hdr);
@@ -1126,11 +1102,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1126 struct ipv6_pinfo *np = inet6_sk(sk); 1102 struct ipv6_pinfo *np = inet6_sk(sk);
1127 struct inet_cork *cork; 1103 struct inet_cork *cork;
1128 struct sk_buff *skb, *skb_prev = NULL; 1104 struct sk_buff *skb, *skb_prev = NULL;
1129 unsigned int maxfraglen, fragheaderlen; 1105 unsigned int maxfraglen, fragheaderlen, mtu;
1130 int exthdrlen; 1106 int exthdrlen;
1131 int dst_exthdrlen; 1107 int dst_exthdrlen;
1132 int hh_len; 1108 int hh_len;
1133 int mtu;
1134 int copy; 1109 int copy;
1135 int err; 1110 int err;
1136 int offset = 0; 1111 int offset = 0;
@@ -1243,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1243 * --yoshfuji 1218 * --yoshfuji
1244 */ 1219 */
1245 1220
1246 cork->length += length; 1221 if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
1247 if (length > mtu) { 1222 sk->sk_protocol == IPPROTO_RAW)) {
1248 int proto = sk->sk_protocol; 1223 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1249 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1224 return -EMSGSIZE;
1250 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); 1225 }
1251 return -EMSGSIZE;
1252 }
1253
1254 if (proto == IPPROTO_UDP &&
1255 (rt->dst.dev->features & NETIF_F_UFO)) {
1256 1226
1257 err = ip6_ufo_append_data(sk, getfrag, from, length, 1227 skb = skb_peek_tail(&sk->sk_write_queue);
1258 hh_len, fragheaderlen, 1228 cork->length += length;
1259 transhdrlen, mtu, flags, rt); 1229 if (((length > mtu) ||
1260 if (err) 1230 (skb && skb_is_gso(skb))) &&
1261 goto error; 1231 (sk->sk_protocol == IPPROTO_UDP) &&
1262 return 0; 1232 (rt->dst.dev->features & NETIF_F_UFO)) {
1263 } 1233 err = ip6_ufo_append_data(sk, getfrag, from, length,
1234 hh_len, fragheaderlen,
1235 transhdrlen, mtu, flags, rt);
1236 if (err)
1237 goto error;
1238 return 0;
1264 } 1239 }
1265 1240
1266 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1241 if (!skb)
1267 goto alloc_new_skb; 1242 goto alloc_new_skb;
1268 1243
1269 while (length > 0) { 1244 while (length > 0) {
@@ -1287,7 +1262,9 @@ alloc_new_skb:
1287 /* update mtu and maxfraglen if necessary */ 1262 /* update mtu and maxfraglen if necessary */
1288 if (skb == NULL || skb_prev == NULL) 1263 if (skb == NULL || skb_prev == NULL)
1289 ip6_append_data_mtu(&mtu, &maxfraglen, 1264 ip6_append_data_mtu(&mtu, &maxfraglen,
1290 fragheaderlen, skb, rt); 1265 fragheaderlen, skb, rt,
1266 np->pmtudisc ==
1267 IPV6_PMTUDISC_PROBE);
1291 1268
1292 skb_prev = skb; 1269 skb_prev = skb;
1293 1270
@@ -1350,6 +1327,7 @@ alloc_new_skb:
1350 /* 1327 /*
1351 * Fill in the control structures 1328 * Fill in the control structures
1352 */ 1329 */
1330 skb->protocol = htons(ETH_P_IPV6);
1353 skb->ip_summed = CHECKSUM_NONE; 1331 skb->ip_summed = CHECKSUM_NONE;
1354 skb->csum = 0; 1332 skb->csum = 0;
1355 /* reserve for fragmentation and ipsec header */ 1333 /* reserve for fragmentation and ipsec header */
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1e55866cead7..a791552e0422 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -41,6 +41,7 @@
41#include <linux/netfilter_ipv6.h> 41#include <linux/netfilter_ipv6.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/hash.h> 43#include <linux/hash.h>
44#include <linux/etherdevice.h>
44 45
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <linux/atomic.h> 47#include <linux/atomic.h>
@@ -315,6 +316,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
315 316
316 t = netdev_priv(dev); 317 t = netdev_priv(dev);
317 t->parms = *p; 318 t->parms = *p;
319 t->net = dev_net(dev);
318 err = ip6_tnl_create2(dev); 320 err = ip6_tnl_create2(dev);
319 if (err < 0) 321 if (err < 0)
320 goto failed_free; 322 goto failed_free;
@@ -374,7 +376,7 @@ static void
374ip6_tnl_dev_uninit(struct net_device *dev) 376ip6_tnl_dev_uninit(struct net_device *dev)
375{ 377{
376 struct ip6_tnl *t = netdev_priv(dev); 378 struct ip6_tnl *t = netdev_priv(dev);
377 struct net *net = dev_net(dev); 379 struct net *net = t->net;
378 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 380 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
379 381
380 if (dev == ip6n->fb_tnl_dev) 382 if (dev == ip6n->fb_tnl_dev)
@@ -741,7 +743,7 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
741{ 743{
742 struct __ip6_tnl_parm *p = &t->parms; 744 struct __ip6_tnl_parm *p = &t->parms;
743 int ret = 0; 745 int ret = 0;
744 struct net *net = dev_net(t->dev); 746 struct net *net = t->net;
745 747
746 if ((p->flags & IP6_TNL_F_CAP_RCV) || 748 if ((p->flags & IP6_TNL_F_CAP_RCV) ||
747 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && 749 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
@@ -800,14 +802,12 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
800 rcu_read_unlock(); 802 rcu_read_unlock();
801 goto discard; 803 goto discard;
802 } 804 }
803 secpath_reset(skb);
804 skb->mac_header = skb->network_header; 805 skb->mac_header = skb->network_header;
805 skb_reset_network_header(skb); 806 skb_reset_network_header(skb);
806 skb->protocol = htons(protocol); 807 skb->protocol = htons(protocol);
807 skb->pkt_type = PACKET_HOST;
808 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 808 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
809 809
810 __skb_tunnel_rx(skb, t->dev); 810 __skb_tunnel_rx(skb, t->dev, t->net);
811 811
812 err = dscp_ecn_decapsulate(t, ipv6h, skb); 812 err = dscp_ecn_decapsulate(t, ipv6h, skb);
813 if (unlikely(err)) { 813 if (unlikely(err)) {
@@ -895,7 +895,7 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
895{ 895{
896 struct __ip6_tnl_parm *p = &t->parms; 896 struct __ip6_tnl_parm *p = &t->parms;
897 int ret = 0; 897 int ret = 0;
898 struct net *net = dev_net(t->dev); 898 struct net *net = t->net;
899 899
900 if (p->flags & IP6_TNL_F_CAP_XMIT) { 900 if (p->flags & IP6_TNL_F_CAP_XMIT) {
901 struct net_device *ldev = NULL; 901 struct net_device *ldev = NULL;
@@ -945,8 +945,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
945 int encap_limit, 945 int encap_limit,
946 __u32 *pmtu) 946 __u32 *pmtu)
947{ 947{
948 struct net *net = dev_net(dev);
949 struct ip6_tnl *t = netdev_priv(dev); 948 struct ip6_tnl *t = netdev_priv(dev);
949 struct net *net = t->net;
950 struct net_device_stats *stats = &t->dev->stats; 950 struct net_device_stats *stats = &t->dev->stats;
951 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 951 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
952 struct ipv6_tel_txoption opt; 952 struct ipv6_tel_txoption opt;
@@ -996,6 +996,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
996 goto tx_err_dst_release; 996 goto tx_err_dst_release;
997 } 997 }
998 998
999 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
1000
999 /* 1001 /*
1000 * Okay, now see if we can stuff it in the buffer as-is. 1002 * Okay, now see if we can stuff it in the buffer as-is.
1001 */ 1003 */
@@ -1013,7 +1015,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1013 consume_skb(skb); 1015 consume_skb(skb);
1014 skb = new_skb; 1016 skb = new_skb;
1015 } 1017 }
1016 skb_dst_drop(skb);
1017 if (fl6->flowi6_mark) { 1018 if (fl6->flowi6_mark) {
1018 skb_dst_set(skb, dst); 1019 skb_dst_set(skb, dst);
1019 ndst = NULL; 1020 ndst = NULL;
@@ -1027,6 +1028,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1027 init_tel_txopt(&opt, encap_limit); 1028 init_tel_txopt(&opt, encap_limit);
1028 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); 1029 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
1029 } 1030 }
1031
1032 if (likely(!skb->encapsulation)) {
1033 skb_reset_inner_headers(skb);
1034 skb->encapsulation = 1;
1035 }
1036
1030 skb_push(skb, sizeof(struct ipv6hdr)); 1037 skb_push(skb, sizeof(struct ipv6hdr));
1031 skb_reset_network_header(skb); 1038 skb_reset_network_header(skb);
1032 ipv6h = ipv6_hdr(skb); 1039 ipv6h = ipv6_hdr(skb);
@@ -1202,7 +1209,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1202 int strict = (ipv6_addr_type(&p->raddr) & 1209 int strict = (ipv6_addr_type(&p->raddr) &
1203 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); 1210 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1204 1211
1205 struct rt6_info *rt = rt6_lookup(dev_net(dev), 1212 struct rt6_info *rt = rt6_lookup(t->net,
1206 &p->raddr, &p->laddr, 1213 &p->raddr, &p->laddr,
1207 p->link, strict); 1214 p->link, strict);
1208 1215
@@ -1251,7 +1258,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1251 1258
1252static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) 1259static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1253{ 1260{
1254 struct net *net = dev_net(t->dev); 1261 struct net *net = t->net;
1255 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1262 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1256 int err; 1263 int err;
1257 1264
@@ -1463,8 +1470,10 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
1463 dev->mtu-=8; 1470 dev->mtu-=8;
1464 dev->flags |= IFF_NOARP; 1471 dev->flags |= IFF_NOARP;
1465 dev->addr_len = sizeof(struct in6_addr); 1472 dev->addr_len = sizeof(struct in6_addr);
1466 dev->features |= NETIF_F_NETNS_LOCAL;
1467 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1473 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1474 /* This perm addr will be used as interface identifier by IPv6 */
1475 dev->addr_assign_type = NET_ADDR_RANDOM;
1476 eth_random_addr(dev->perm_addr);
1468} 1477}
1469 1478
1470 1479
@@ -1479,6 +1488,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
1479 struct ip6_tnl *t = netdev_priv(dev); 1488 struct ip6_tnl *t = netdev_priv(dev);
1480 1489
1481 t->dev = dev; 1490 t->dev = dev;
1491 t->net = dev_net(dev);
1482 dev->tstats = alloc_percpu(struct pcpu_tstats); 1492 dev->tstats = alloc_percpu(struct pcpu_tstats);
1483 if (!dev->tstats) 1493 if (!dev->tstats)
1484 return -ENOMEM; 1494 return -ENOMEM;
@@ -1596,9 +1606,9 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
1596static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], 1606static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
1597 struct nlattr *data[]) 1607 struct nlattr *data[])
1598{ 1608{
1599 struct ip6_tnl *t; 1609 struct ip6_tnl *t = netdev_priv(dev);
1600 struct __ip6_tnl_parm p; 1610 struct __ip6_tnl_parm p;
1601 struct net *net = dev_net(dev); 1611 struct net *net = t->net;
1602 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1612 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1603 1613
1604 if (dev == ip6n->fb_tnl_dev) 1614 if (dev == ip6n->fb_tnl_dev)
@@ -1646,9 +1656,9 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
1646 1656
1647 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || 1657 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1648 nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), 1658 nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
1649 &parm->raddr) ||
1650 nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
1651 &parm->laddr) || 1659 &parm->laddr) ||
1660 nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
1661 &parm->raddr) ||
1652 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || 1662 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
1653 nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || 1663 nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
1654 nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || 1664 nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
@@ -1699,20 +1709,28 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1699 1709
1700static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) 1710static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1701{ 1711{
1712 struct net *net = dev_net(ip6n->fb_tnl_dev);
1713 struct net_device *dev, *aux;
1702 int h; 1714 int h;
1703 struct ip6_tnl *t; 1715 struct ip6_tnl *t;
1704 LIST_HEAD(list); 1716 LIST_HEAD(list);
1705 1717
1718 for_each_netdev_safe(net, dev, aux)
1719 if (dev->rtnl_link_ops == &ip6_link_ops)
1720 unregister_netdevice_queue(dev, &list);
1721
1706 for (h = 0; h < HASH_SIZE; h++) { 1722 for (h = 0; h < HASH_SIZE; h++) {
1707 t = rtnl_dereference(ip6n->tnls_r_l[h]); 1723 t = rtnl_dereference(ip6n->tnls_r_l[h]);
1708 while (t != NULL) { 1724 while (t != NULL) {
1709 unregister_netdevice_queue(t->dev, &list); 1725 /* If dev is in the same netns, it has already
1726 * been added to the list by the previous loop.
1727 */
1728 if (!net_eq(dev_net(t->dev), net))
1729 unregister_netdevice_queue(t->dev, &list);
1710 t = rtnl_dereference(t->next); 1730 t = rtnl_dereference(t->next);
1711 } 1731 }
1712 } 1732 }
1713 1733
1714 t = rtnl_dereference(ip6n->tnls_wc[0]);
1715 unregister_netdevice_queue(t->dev, &list);
1716 unregister_netdevice_many(&list); 1734 unregister_netdevice_many(&list);
1717} 1735}
1718 1736
@@ -1732,6 +1750,11 @@ static int __net_init ip6_tnl_init_net(struct net *net)
1732 if (!ip6n->fb_tnl_dev) 1750 if (!ip6n->fb_tnl_dev)
1733 goto err_alloc_dev; 1751 goto err_alloc_dev;
1734 dev_net_set(ip6n->fb_tnl_dev, net); 1752 dev_net_set(ip6n->fb_tnl_dev, net);
1753 ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
1754 /* FB netdevice is special: we have one, and only one per netns.
1755 * Allowing to move it to another netns is clearly unsafe.
1756 */
1757 ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
1735 1758
1736 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); 1759 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1737 if (err < 0) 1760 if (err < 0)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 241fb8ad9fcf..f365310bfcca 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -110,8 +110,8 @@ static struct kmem_cache *mrt_cachep __read_mostly;
110static struct mr6_table *ip6mr_new_table(struct net *net, u32 id); 110static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111static void ip6mr_free_table(struct mr6_table *mrt); 111static void ip6mr_free_table(struct mr6_table *mrt);
112 112
113static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, 113static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 struct sk_buff *skb, struct mfc6_cache *cache); 114 struct sk_buff *skb, struct mfc6_cache *cache);
115static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, 115static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 mifi_t mifi, int assert); 116 mifi_t mifi, int assert);
117static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 117static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
@@ -259,10 +259,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
259{ 259{
260 struct mr6_table *mrt, *next; 260 struct mr6_table *mrt, *next;
261 261
262 rtnl_lock();
262 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { 263 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
263 list_del(&mrt->list); 264 list_del(&mrt->list);
264 ip6mr_free_table(mrt); 265 ip6mr_free_table(mrt);
265 } 266 }
267 rtnl_unlock();
266 fib_rules_unregister(net->ipv6.mr6_rules_ops); 268 fib_rules_unregister(net->ipv6.mr6_rules_ops);
267} 269}
268#else 270#else
@@ -289,7 +291,10 @@ static int __net_init ip6mr_rules_init(struct net *net)
289 291
290static void __net_exit ip6mr_rules_exit(struct net *net) 292static void __net_exit ip6mr_rules_exit(struct net *net)
291{ 293{
294 rtnl_lock();
292 ip6mr_free_table(net->ipv6.mrt6); 295 ip6mr_free_table(net->ipv6.mrt6);
296 net->ipv6.mrt6 = NULL;
297 rtnl_unlock();
293} 298}
294#endif 299#endif
295 300
@@ -667,9 +672,8 @@ static int pim6_rcv(struct sk_buff *skb)
667 skb_reset_network_header(skb); 672 skb_reset_network_header(skb);
668 skb->protocol = htons(ETH_P_IPV6); 673 skb->protocol = htons(ETH_P_IPV6);
669 skb->ip_summed = CHECKSUM_NONE; 674 skb->ip_summed = CHECKSUM_NONE;
670 skb->pkt_type = PACKET_HOST;
671 675
672 skb_tunnel_rx(skb, reg_dev); 676 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
673 677
674 netif_rx(skb); 678 netif_rx(skb);
675 679
@@ -1319,7 +1323,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1319static int ip6mr_device_event(struct notifier_block *this, 1323static int ip6mr_device_event(struct notifier_block *this,
1320 unsigned long event, void *ptr) 1324 unsigned long event, void *ptr)
1321{ 1325{
1322 struct net_device *dev = ptr; 1326 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1323 struct net *net = dev_net(dev); 1327 struct net *net = dev_net(dev);
1324 struct mr6_table *mrt; 1328 struct mr6_table *mrt;
1325 struct mif_device *v; 1329 struct mif_device *v;
@@ -2069,8 +2073,8 @@ static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2069 return ct; 2073 return ct;
2070} 2074}
2071 2075
2072static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, 2076static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2073 struct sk_buff *skb, struct mfc6_cache *cache) 2077 struct sk_buff *skb, struct mfc6_cache *cache)
2074{ 2078{
2075 int psend = -1; 2079 int psend = -1;
2076 int vif, ct; 2080 int vif, ct;
@@ -2151,12 +2155,11 @@ forward:
2151last_forward: 2155last_forward:
2152 if (psend != -1) { 2156 if (psend != -1) {
2153 ip6mr_forward2(net, mrt, skb, cache, psend); 2157 ip6mr_forward2(net, mrt, skb, cache, psend);
2154 return 0; 2158 return;
2155 } 2159 }
2156 2160
2157dont_forward: 2161dont_forward:
2158 kfree_skb(skb); 2162 kfree_skb(skb);
2159 return 0;
2160} 2163}
2161 2164
2162 2165
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 7af5aee75d98..5636a912074a 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -76,7 +76,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
76 return; 76 return;
77 77
78 if (type == NDISC_REDIRECT) 78 if (type == NDISC_REDIRECT)
79 ip6_redirect(skb, net, 0, 0); 79 ip6_redirect(skb, net, skb->dev->ifindex, 0);
80 else 80 else
81 ip6_update_pmtu(skb, net, info, 0, 0); 81 ip6_update_pmtu(skb, net, info, 0, 0);
82 xfrm_state_put(x); 82 xfrm_state_put(x);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bfa6cc36ef2a..d18f9f903db6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -44,6 +44,7 @@
44#include <linux/proc_fs.h> 44#include <linux/proc_fs.h>
45#include <linux/seq_file.h> 45#include <linux/seq_file.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/pkt_sched.h>
47#include <net/mld.h> 48#include <net/mld.h>
48 49
49#include <linux/netfilter.h> 50#include <linux/netfilter.h>
@@ -94,6 +95,7 @@ static void mld_ifc_event(struct inet6_dev *idev);
94static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); 95static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
95static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr); 96static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr);
96static void mld_clear_delrec(struct inet6_dev *idev); 97static void mld_clear_delrec(struct inet6_dev *idev);
98static bool mld_in_v1_mode(const struct inet6_dev *idev);
97static int sf_setstate(struct ifmcaddr6 *pmc); 99static int sf_setstate(struct ifmcaddr6 *pmc);
98static void sf_markstate(struct ifmcaddr6 *pmc); 100static void sf_markstate(struct ifmcaddr6 *pmc);
99static void ip6_mc_clear_src(struct ifmcaddr6 *pmc); 101static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
@@ -106,14 +108,15 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
106static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, 108static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
107 struct inet6_dev *idev); 109 struct inet6_dev *idev);
108 110
109
110#define IGMP6_UNSOLICITED_IVAL (10*HZ)
111#define MLD_QRV_DEFAULT 2 111#define MLD_QRV_DEFAULT 2
112/* RFC3810, 9.2. Query Interval */
113#define MLD_QI_DEFAULT (125 * HZ)
114/* RFC3810, 9.3. Query Response Interval */
115#define MLD_QRI_DEFAULT (10 * HZ)
112 116
113#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \ 117/* RFC3810, 8.1 Query Version Distinctions */
114 (idev)->cnf.force_mld_version == 1 || \ 118#define MLD_V1_QUERY_LEN 24
115 ((idev)->mc_v1_seen && \ 119#define MLD_V2_QUERY_LEN_MIN 28
116 time_before(jiffies, (idev)->mc_v1_seen)))
117 120
118#define IPV6_MLD_MAX_MSF 64 121#define IPV6_MLD_MAX_MSF 64
119 122
@@ -128,6 +131,18 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
128 pmc != NULL; \ 131 pmc != NULL; \
129 pmc = rcu_dereference(pmc->next)) 132 pmc = rcu_dereference(pmc->next))
130 133
134static int unsolicited_report_interval(struct inet6_dev *idev)
135{
136 int iv;
137
138 if (mld_in_v1_mode(idev))
139 iv = idev->cnf.mldv1_unsolicited_report_interval;
140 else
141 iv = idev->cnf.mldv2_unsolicited_report_interval;
142
143 return iv > 0 ? iv : 1;
144}
145
131int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) 146int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
132{ 147{
133 struct net_device *dev = NULL; 148 struct net_device *dev = NULL;
@@ -676,7 +691,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
676 if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT)) 691 if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
677 return; 692 return;
678 693
679 if (MLD_V1_SEEN(mc->idev)) { 694 if (mld_in_v1_mode(mc->idev)) {
680 igmp6_join_group(mc); 695 igmp6_join_group(mc);
681 return; 696 return;
682 } 697 }
@@ -984,21 +999,49 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
984 999
985static void mld_gq_start_timer(struct inet6_dev *idev) 1000static void mld_gq_start_timer(struct inet6_dev *idev)
986{ 1001{
987 int tv = net_random() % idev->mc_maxdelay; 1002 unsigned long tv = net_random() % idev->mc_maxdelay;
988 1003
989 idev->mc_gq_running = 1; 1004 idev->mc_gq_running = 1;
990 if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2)) 1005 if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
991 in6_dev_hold(idev); 1006 in6_dev_hold(idev);
992} 1007}
993 1008
994static void mld_ifc_start_timer(struct inet6_dev *idev, int delay) 1009static void mld_gq_stop_timer(struct inet6_dev *idev)
1010{
1011 idev->mc_gq_running = 0;
1012 if (del_timer(&idev->mc_gq_timer))
1013 __in6_dev_put(idev);
1014}
1015
1016static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)
995{ 1017{
996 int tv = net_random() % delay; 1018 unsigned long tv = net_random() % delay;
997 1019
998 if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2)) 1020 if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
999 in6_dev_hold(idev); 1021 in6_dev_hold(idev);
1000} 1022}
1001 1023
1024static void mld_ifc_stop_timer(struct inet6_dev *idev)
1025{
1026 idev->mc_ifc_count = 0;
1027 if (del_timer(&idev->mc_ifc_timer))
1028 __in6_dev_put(idev);
1029}
1030
1031static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)
1032{
1033 unsigned long tv = net_random() % delay;
1034
1035 if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))
1036 in6_dev_hold(idev);
1037}
1038
1039static void mld_dad_stop_timer(struct inet6_dev *idev)
1040{
1041 if (del_timer(&idev->mc_dad_timer))
1042 __in6_dev_put(idev);
1043}
1044
1002/* 1045/*
1003 * IGMP handling (alias multicast ICMPv6 messages) 1046 * IGMP handling (alias multicast ICMPv6 messages)
1004 */ 1047 */
@@ -1017,12 +1060,9 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
1017 delay = ma->mca_timer.expires - jiffies; 1060 delay = ma->mca_timer.expires - jiffies;
1018 } 1061 }
1019 1062
1020 if (delay >= resptime) { 1063 if (delay >= resptime)
1021 if (resptime) 1064 delay = net_random() % resptime;
1022 delay = net_random() % resptime; 1065
1023 else
1024 delay = 1;
1025 }
1026 ma->mca_timer.expires = jiffies + delay; 1066 ma->mca_timer.expires = jiffies + delay;
1027 if (!mod_timer(&ma->mca_timer, jiffies + delay)) 1067 if (!mod_timer(&ma->mca_timer, jiffies + delay))
1028 atomic_inc(&ma->mca_refcnt); 1068 atomic_inc(&ma->mca_refcnt);
@@ -1089,6 +1129,158 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1089 return true; 1129 return true;
1090} 1130}
1091 1131
1132static int mld_force_mld_version(const struct inet6_dev *idev)
1133{
1134 /* Normally, both are 0 here. If enforcement to a particular is
1135 * being used, individual device enforcement will have a lower
1136 * precedence over 'all' device (.../conf/all/force_mld_version).
1137 */
1138
1139 if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0)
1140 return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version;
1141 else
1142 return idev->cnf.force_mld_version;
1143}
1144
1145static bool mld_in_v2_mode_only(const struct inet6_dev *idev)
1146{
1147 return mld_force_mld_version(idev) == 2;
1148}
1149
1150static bool mld_in_v1_mode_only(const struct inet6_dev *idev)
1151{
1152 return mld_force_mld_version(idev) == 1;
1153}
1154
1155static bool mld_in_v1_mode(const struct inet6_dev *idev)
1156{
1157 if (mld_in_v2_mode_only(idev))
1158 return false;
1159 if (mld_in_v1_mode_only(idev))
1160 return true;
1161 if (idev->mc_v1_seen && time_before(jiffies, idev->mc_v1_seen))
1162 return true;
1163
1164 return false;
1165}
1166
1167static void mld_set_v1_mode(struct inet6_dev *idev)
1168{
1169 /* RFC3810, relevant sections:
1170 * - 9.1. Robustness Variable
1171 * - 9.2. Query Interval
1172 * - 9.3. Query Response Interval
1173 * - 9.12. Older Version Querier Present Timeout
1174 */
1175 unsigned long switchback;
1176
1177 switchback = (idev->mc_qrv * idev->mc_qi) + idev->mc_qri;
1178
1179 idev->mc_v1_seen = jiffies + switchback;
1180}
1181
1182static void mld_update_qrv(struct inet6_dev *idev,
1183 const struct mld2_query *mlh2)
1184{
1185 /* RFC3810, relevant sections:
1186 * - 5.1.8. QRV (Querier's Robustness Variable)
1187 * - 9.1. Robustness Variable
1188 */
1189
1190 /* The value of the Robustness Variable MUST NOT be zero,
1191 * and SHOULD NOT be one. Catch this here if we ever run
1192 * into such a case in future.
1193 */
1194 WARN_ON(idev->mc_qrv == 0);
1195
1196 if (mlh2->mld2q_qrv > 0)
1197 idev->mc_qrv = mlh2->mld2q_qrv;
1198
1199 if (unlikely(idev->mc_qrv < 2)) {
1200 net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n",
1201 idev->mc_qrv, MLD_QRV_DEFAULT);
1202 idev->mc_qrv = MLD_QRV_DEFAULT;
1203 }
1204}
1205
1206static void mld_update_qi(struct inet6_dev *idev,
1207 const struct mld2_query *mlh2)
1208{
1209 /* RFC3810, relevant sections:
1210 * - 5.1.9. QQIC (Querier's Query Interval Code)
1211 * - 9.2. Query Interval
1212 * - 9.12. Older Version Querier Present Timeout
1213 * (the [Query Interval] in the last Query received)
1214 */
1215 unsigned long mc_qqi;
1216
1217 if (mlh2->mld2q_qqic < 128) {
1218 mc_qqi = mlh2->mld2q_qqic;
1219 } else {
1220 unsigned long mc_man, mc_exp;
1221
1222 mc_exp = MLDV2_QQIC_EXP(mlh2->mld2q_qqic);
1223 mc_man = MLDV2_QQIC_MAN(mlh2->mld2q_qqic);
1224
1225 mc_qqi = (mc_man | 0x10) << (mc_exp + 3);
1226 }
1227
1228 idev->mc_qi = mc_qqi * HZ;
1229}
1230
1231static void mld_update_qri(struct inet6_dev *idev,
1232 const struct mld2_query *mlh2)
1233{
1234 /* RFC3810, relevant sections:
1235 * - 5.1.3. Maximum Response Code
1236 * - 9.3. Query Response Interval
1237 */
1238 idev->mc_qri = msecs_to_jiffies(mldv2_mrc(mlh2));
1239}
1240
1241static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
1242 unsigned long *max_delay)
1243{
1244 unsigned long mldv1_md;
1245
1246 /* Ignore v1 queries */
1247 if (mld_in_v2_mode_only(idev))
1248 return -EINVAL;
1249
1250 /* MLDv1 router present */
1251 mldv1_md = ntohs(mld->mld_maxdelay);
1252 *max_delay = max(msecs_to_jiffies(mldv1_md), 1UL);
1253
1254 mld_set_v1_mode(idev);
1255
1256 /* cancel MLDv2 report timer */
1257 mld_gq_stop_timer(idev);
1258 /* cancel the interface change timer */
1259 mld_ifc_stop_timer(idev);
1260 /* clear deleted report items */
1261 mld_clear_delrec(idev);
1262
1263 return 0;
1264}
1265
1266static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
1267 unsigned long *max_delay)
1268{
1269 /* hosts need to stay in MLDv1 mode, discard MLDv2 queries */
1270 if (mld_in_v1_mode(idev))
1271 return -EINVAL;
1272
1273 *max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
1274
1275 mld_update_qrv(idev, mld);
1276 mld_update_qi(idev, mld);
1277 mld_update_qri(idev, mld);
1278
1279 idev->mc_maxdelay = *max_delay;
1280
1281 return 0;
1282}
1283
1092/* called with rcu_read_lock() */ 1284/* called with rcu_read_lock() */
1093int igmp6_event_query(struct sk_buff *skb) 1285int igmp6_event_query(struct sk_buff *skb)
1094{ 1286{
@@ -1100,7 +1292,7 @@ int igmp6_event_query(struct sk_buff *skb)
1100 struct mld_msg *mld; 1292 struct mld_msg *mld;
1101 int group_type; 1293 int group_type;
1102 int mark = 0; 1294 int mark = 0;
1103 int len; 1295 int len, err;
1104 1296
1105 if (!pskb_may_pull(skb, sizeof(struct in6_addr))) 1297 if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
1106 return -EINVAL; 1298 return -EINVAL;
@@ -1114,7 +1306,6 @@ int igmp6_event_query(struct sk_buff *skb)
1114 return -EINVAL; 1306 return -EINVAL;
1115 1307
1116 idev = __in6_dev_get(skb->dev); 1308 idev = __in6_dev_get(skb->dev);
1117
1118 if (idev == NULL) 1309 if (idev == NULL)
1119 return 0; 1310 return 0;
1120 1311
@@ -1126,35 +1317,23 @@ int igmp6_event_query(struct sk_buff *skb)
1126 !(group_type&IPV6_ADDR_MULTICAST)) 1317 !(group_type&IPV6_ADDR_MULTICAST))
1127 return -EINVAL; 1318 return -EINVAL;
1128 1319
1129 if (len == 24) { 1320 if (len == MLD_V1_QUERY_LEN) {
1130 int switchback; 1321 err = mld_process_v1(idev, mld, &max_delay);
1131 /* MLDv1 router present */ 1322 if (err < 0)
1132 1323 return err;
1133 /* Translate milliseconds to jiffies */ 1324 } else if (len >= MLD_V2_QUERY_LEN_MIN) {
1134 max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
1135
1136 switchback = (idev->mc_qrv + 1) * max_delay;
1137 idev->mc_v1_seen = jiffies + switchback;
1138
1139 /* cancel the interface change timer */
1140 idev->mc_ifc_count = 0;
1141 if (del_timer(&idev->mc_ifc_timer))
1142 __in6_dev_put(idev);
1143 /* clear deleted report items */
1144 mld_clear_delrec(idev);
1145 } else if (len >= 28) {
1146 int srcs_offset = sizeof(struct mld2_query) - 1325 int srcs_offset = sizeof(struct mld2_query) -
1147 sizeof(struct icmp6hdr); 1326 sizeof(struct icmp6hdr);
1327
1148 if (!pskb_may_pull(skb, srcs_offset)) 1328 if (!pskb_may_pull(skb, srcs_offset))
1149 return -EINVAL; 1329 return -EINVAL;
1150 1330
1151 mlh2 = (struct mld2_query *)skb_transport_header(skb); 1331 mlh2 = (struct mld2_query *)skb_transport_header(skb);
1152 max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000; 1332
1153 if (!max_delay) 1333 err = mld_process_v2(idev, mlh2, &max_delay);
1154 max_delay = 1; 1334 if (err < 0)
1155 idev->mc_maxdelay = max_delay; 1335 return err;
1156 if (mlh2->mld2q_qrv) 1336
1157 idev->mc_qrv = mlh2->mld2q_qrv;
1158 if (group_type == IPV6_ADDR_ANY) { /* general query */ 1337 if (group_type == IPV6_ADDR_ANY) { /* general query */
1159 if (mlh2->mld2q_nsrcs) 1338 if (mlh2->mld2q_nsrcs)
1160 return -EINVAL; /* no sources allowed */ 1339 return -EINVAL; /* no sources allowed */
@@ -1343,8 +1522,9 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
1343 hdr->daddr = *daddr; 1522 hdr->daddr = *daddr;
1344} 1523}
1345 1524
1346static struct sk_buff *mld_newpack(struct net_device *dev, int size) 1525static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size)
1347{ 1526{
1527 struct net_device *dev = idev->dev;
1348 struct net *net = dev_net(dev); 1528 struct net *net = dev_net(dev);
1349 struct sock *sk = net->ipv6.igmp_sk; 1529 struct sock *sk = net->ipv6.igmp_sk;
1350 struct sk_buff *skb; 1530 struct sk_buff *skb;
@@ -1367,9 +1547,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1367 if (!skb) 1547 if (!skb)
1368 return NULL; 1548 return NULL;
1369 1549
1550 skb->priority = TC_PRIO_CONTROL;
1370 skb_reserve(skb, hlen); 1551 skb_reserve(skb, hlen);
1371 1552
1372 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { 1553 if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
1373 /* <draft-ietf-magma-mld-source-05.txt>: 1554 /* <draft-ietf-magma-mld-source-05.txt>:
1374 * use unspecified address as the source address 1555 * use unspecified address as the source address
1375 * when a valid link-local address is not available. 1556 * when a valid link-local address is not available.
@@ -1409,8 +1590,9 @@ static void mld_sendpack(struct sk_buff *skb)
1409 idev = __in6_dev_get(skb->dev); 1590 idev = __in6_dev_get(skb->dev);
1410 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 1591 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
1411 1592
1412 payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); 1593 payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) -
1413 mldlen = skb->tail - skb->transport_header; 1594 sizeof(*pip6);
1595 mldlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1414 pip6->payload_len = htons(payload_len); 1596 pip6->payload_len = htons(payload_len);
1415 1597
1416 pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, 1598 pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
@@ -1465,7 +1647,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1465 struct mld2_grec *pgr; 1647 struct mld2_grec *pgr;
1466 1648
1467 if (!skb) 1649 if (!skb)
1468 skb = mld_newpack(dev, dev->mtu); 1650 skb = mld_newpack(pmc->idev, dev->mtu);
1469 if (!skb) 1651 if (!skb)
1470 return NULL; 1652 return NULL;
1471 pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); 1653 pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
@@ -1485,7 +1667,8 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1485static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, 1667static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1486 int type, int gdeleted, int sdeleted) 1668 int type, int gdeleted, int sdeleted)
1487{ 1669{
1488 struct net_device *dev = pmc->idev->dev; 1670 struct inet6_dev *idev = pmc->idev;
1671 struct net_device *dev = idev->dev;
1489 struct mld2_report *pmr; 1672 struct mld2_report *pmr;
1490 struct mld2_grec *pgr = NULL; 1673 struct mld2_grec *pgr = NULL;
1491 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; 1674 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
@@ -1514,7 +1697,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1514 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { 1697 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
1515 if (skb) 1698 if (skb)
1516 mld_sendpack(skb); 1699 mld_sendpack(skb);
1517 skb = mld_newpack(dev, dev->mtu); 1700 skb = mld_newpack(idev, dev->mtu);
1518 } 1701 }
1519 } 1702 }
1520 first = 1; 1703 first = 1;
@@ -1541,7 +1724,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1541 pgr->grec_nsrcs = htons(scount); 1724 pgr->grec_nsrcs = htons(scount);
1542 if (skb) 1725 if (skb)
1543 mld_sendpack(skb); 1726 mld_sendpack(skb);
1544 skb = mld_newpack(dev, dev->mtu); 1727 skb = mld_newpack(idev, dev->mtu);
1545 first = 1; 1728 first = 1;
1546 scount = 0; 1729 scount = 0;
1547 } 1730 }
@@ -1596,8 +1779,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
1596 struct sk_buff *skb = NULL; 1779 struct sk_buff *skb = NULL;
1597 int type; 1780 int type;
1598 1781
1782 read_lock_bh(&idev->lock);
1599 if (!pmc) { 1783 if (!pmc) {
1600 read_lock_bh(&idev->lock);
1601 for (pmc=idev->mc_list; pmc; pmc=pmc->next) { 1784 for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
1602 if (pmc->mca_flags & MAF_NOREPORT) 1785 if (pmc->mca_flags & MAF_NOREPORT)
1603 continue; 1786 continue;
@@ -1609,7 +1792,6 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
1609 skb = add_grec(skb, pmc, type, 0, 0); 1792 skb = add_grec(skb, pmc, type, 0, 0);
1610 spin_unlock_bh(&pmc->mca_lock); 1793 spin_unlock_bh(&pmc->mca_lock);
1611 } 1794 }
1612 read_unlock_bh(&idev->lock);
1613 } else { 1795 } else {
1614 spin_lock_bh(&pmc->mca_lock); 1796 spin_lock_bh(&pmc->mca_lock);
1615 if (pmc->mca_sfcount[MCAST_EXCLUDE]) 1797 if (pmc->mca_sfcount[MCAST_EXCLUDE])
@@ -1619,6 +1801,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
1619 skb = add_grec(skb, pmc, type, 0, 0); 1801 skb = add_grec(skb, pmc, type, 0, 0);
1620 spin_unlock_bh(&pmc->mca_lock); 1802 spin_unlock_bh(&pmc->mca_lock);
1621 } 1803 }
1804 read_unlock_bh(&idev->lock);
1622 if (skb) 1805 if (skb)
1623 mld_sendpack(skb); 1806 mld_sendpack(skb);
1624} 1807}
@@ -1758,7 +1941,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1758 rcu_read_unlock(); 1941 rcu_read_unlock();
1759 return; 1942 return;
1760 } 1943 }
1761 1944 skb->priority = TC_PRIO_CONTROL;
1762 skb_reserve(skb, hlen); 1945 skb_reserve(skb, hlen);
1763 1946
1764 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { 1947 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
@@ -1814,6 +1997,46 @@ err_out:
1814 goto out; 1997 goto out;
1815} 1998}
1816 1999
2000static void mld_resend_report(struct inet6_dev *idev)
2001{
2002 if (mld_in_v1_mode(idev)) {
2003 struct ifmcaddr6 *mcaddr;
2004 read_lock_bh(&idev->lock);
2005 for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) {
2006 if (!(mcaddr->mca_flags & MAF_NOREPORT))
2007 igmp6_send(&mcaddr->mca_addr, idev->dev,
2008 ICMPV6_MGM_REPORT);
2009 }
2010 read_unlock_bh(&idev->lock);
2011 } else {
2012 mld_send_report(idev, NULL);
2013 }
2014}
2015
2016void ipv6_mc_dad_complete(struct inet6_dev *idev)
2017{
2018 idev->mc_dad_count = idev->mc_qrv;
2019 if (idev->mc_dad_count) {
2020 mld_resend_report(idev);
2021 idev->mc_dad_count--;
2022 if (idev->mc_dad_count)
2023 mld_dad_start_timer(idev, idev->mc_maxdelay);
2024 }
2025}
2026
2027static void mld_dad_timer_expire(unsigned long data)
2028{
2029 struct inet6_dev *idev = (struct inet6_dev *)data;
2030
2031 mld_resend_report(idev);
2032 if (idev->mc_dad_count) {
2033 idev->mc_dad_count--;
2034 if (idev->mc_dad_count)
2035 mld_dad_start_timer(idev, idev->mc_maxdelay);
2036 }
2037 in6_dev_put(idev);
2038}
2039
1817static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, 2040static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
1818 const struct in6_addr *psfsrc) 2041 const struct in6_addr *psfsrc)
1819{ 2042{
@@ -1840,7 +2063,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
1840 else 2063 else
1841 pmc->mca_sources = psf->sf_next; 2064 pmc->mca_sources = psf->sf_next;
1842 if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) && 2065 if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
1843 !MLD_V1_SEEN(idev)) { 2066 !mld_in_v1_mode(idev)) {
1844 psf->sf_crcount = idev->mc_qrv; 2067 psf->sf_crcount = idev->mc_qrv;
1845 psf->sf_next = pmc->mca_tomb; 2068 psf->sf_next = pmc->mca_tomb;
1846 pmc->mca_tomb = psf; 2069 pmc->mca_tomb = psf;
@@ -2105,7 +2328,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
2105 2328
2106 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); 2329 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
2107 2330
2108 delay = net_random() % IGMP6_UNSOLICITED_IVAL; 2331 delay = net_random() % unsolicited_report_interval(ma->idev);
2109 2332
2110 spin_lock_bh(&ma->mca_lock); 2333 spin_lock_bh(&ma->mca_lock);
2111 if (del_timer(&ma->mca_timer)) { 2334 if (del_timer(&ma->mca_timer)) {
@@ -2140,7 +2363,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
2140 2363
2141static void igmp6_leave_group(struct ifmcaddr6 *ma) 2364static void igmp6_leave_group(struct ifmcaddr6 *ma)
2142{ 2365{
2143 if (MLD_V1_SEEN(ma->idev)) { 2366 if (mld_in_v1_mode(ma->idev)) {
2144 if (ma->mca_flags & MAF_LAST_REPORTER) 2367 if (ma->mca_flags & MAF_LAST_REPORTER)
2145 igmp6_send(&ma->mca_addr, ma->idev->dev, 2368 igmp6_send(&ma->mca_addr, ma->idev->dev,
2146 ICMPV6_MGM_REDUCTION); 2369 ICMPV6_MGM_REDUCTION);
@@ -2156,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data)
2156 2379
2157 idev->mc_gq_running = 0; 2380 idev->mc_gq_running = 0;
2158 mld_send_report(idev, NULL); 2381 mld_send_report(idev, NULL);
2159 __in6_dev_put(idev); 2382 in6_dev_put(idev);
2160} 2383}
2161 2384
2162static void mld_ifc_timer_expire(unsigned long data) 2385static void mld_ifc_timer_expire(unsigned long data)
@@ -2169,12 +2392,12 @@ static void mld_ifc_timer_expire(unsigned long data)
2169 if (idev->mc_ifc_count) 2392 if (idev->mc_ifc_count)
2170 mld_ifc_start_timer(idev, idev->mc_maxdelay); 2393 mld_ifc_start_timer(idev, idev->mc_maxdelay);
2171 } 2394 }
2172 __in6_dev_put(idev); 2395 in6_dev_put(idev);
2173} 2396}
2174 2397
2175static void mld_ifc_event(struct inet6_dev *idev) 2398static void mld_ifc_event(struct inet6_dev *idev)
2176{ 2399{
2177 if (MLD_V1_SEEN(idev)) 2400 if (mld_in_v1_mode(idev))
2178 return; 2401 return;
2179 idev->mc_ifc_count = idev->mc_qrv; 2402 idev->mc_ifc_count = idev->mc_qrv;
2180 mld_ifc_start_timer(idev, 1); 2403 mld_ifc_start_timer(idev, 1);
@@ -2185,7 +2408,7 @@ static void igmp6_timer_handler(unsigned long data)
2185{ 2408{
2186 struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data; 2409 struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
2187 2410
2188 if (MLD_V1_SEEN(ma->idev)) 2411 if (mld_in_v1_mode(ma->idev))
2189 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); 2412 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
2190 else 2413 else
2191 mld_send_report(ma->idev, ma); 2414 mld_send_report(ma->idev, ma);
@@ -2225,12 +2448,9 @@ void ipv6_mc_down(struct inet6_dev *idev)
2225 /* Withdraw multicast list */ 2448 /* Withdraw multicast list */
2226 2449
2227 read_lock_bh(&idev->lock); 2450 read_lock_bh(&idev->lock);
2228 idev->mc_ifc_count = 0; 2451 mld_ifc_stop_timer(idev);
2229 if (del_timer(&idev->mc_ifc_timer)) 2452 mld_gq_stop_timer(idev);
2230 __in6_dev_put(idev); 2453 mld_dad_stop_timer(idev);
2231 idev->mc_gq_running = 0;
2232 if (del_timer(&idev->mc_gq_timer))
2233 __in6_dev_put(idev);
2234 2454
2235 for (i = idev->mc_list; i; i=i->next) 2455 for (i = idev->mc_list; i; i=i->next)
2236 igmp6_group_dropped(i); 2456 igmp6_group_dropped(i);
@@ -2267,8 +2487,14 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
2267 idev->mc_ifc_count = 0; 2487 idev->mc_ifc_count = 0;
2268 setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire, 2488 setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire,
2269 (unsigned long)idev); 2489 (unsigned long)idev);
2490 setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire,
2491 (unsigned long)idev);
2492
2270 idev->mc_qrv = MLD_QRV_DEFAULT; 2493 idev->mc_qrv = MLD_QRV_DEFAULT;
2271 idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; 2494 idev->mc_qi = MLD_QI_DEFAULT;
2495 idev->mc_qri = MLD_QRI_DEFAULT;
2496
2497 idev->mc_maxdelay = unsolicited_report_interval(idev);
2272 idev->mc_v1_seen = 0; 2498 idev->mc_v1_seen = 0;
2273 write_unlock_bh(&idev->lock); 2499 write_unlock_bh(&idev->lock);
2274} 2500}
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 0f9bdc5ee9f3..9ac01dc9402e 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -268,7 +268,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
268 struct ipv6_opt_hdr *exthdr = 268 struct ipv6_opt_hdr *exthdr =
269 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 269 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
270 const unsigned char *nh = skb_network_header(skb); 270 const unsigned char *nh = skb_network_header(skb);
271 unsigned int packet_len = skb->tail - skb->network_header; 271 unsigned int packet_len = skb_tail_pointer(skb) -
272 skb_network_header(skb);
272 int found_rhdr = 0; 273 int found_rhdr = 0;
273 274
274 *nexthdr = &ipv6_hdr(skb)->nexthdr; 275 *nexthdr = &ipv6_hdr(skb)->nexthdr;
@@ -404,7 +405,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
404 struct ipv6_opt_hdr *exthdr = 405 struct ipv6_opt_hdr *exthdr =
405 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 406 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
406 const unsigned char *nh = skb_network_header(skb); 407 const unsigned char *nh = skb_network_header(skb);
407 unsigned int packet_len = skb->tail - skb->network_header; 408 unsigned int packet_len = skb_tail_pointer(skb) -
409 skb_network_header(skb);
408 int found_rhdr = 0; 410 int found_rhdr = 0;
409 411
410 *nexthdr = &ipv6_hdr(skb)->nexthdr; 412 *nexthdr = &ipv6_hdr(skb)->nexthdr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2712ab22a174..f8a55ff1971b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -372,14 +372,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
372 int tlen = dev->needed_tailroom; 372 int tlen = dev->needed_tailroom;
373 struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; 373 struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
374 struct sk_buff *skb; 374 struct sk_buff *skb;
375 int err;
376 375
377 skb = sock_alloc_send_skb(sk, 376 skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
378 hlen + sizeof(struct ipv6hdr) + len + tlen,
379 1, &err);
380 if (!skb) { 377 if (!skb) {
381 ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", 378 ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
382 __func__, err); 379 __func__);
383 return NULL; 380 return NULL;
384 } 381 }
385 382
@@ -389,6 +386,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
389 skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); 386 skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
390 skb_reset_transport_header(skb); 387 skb_reset_transport_header(skb);
391 388
389 /* Manually assign socket ownership as we avoid calling
390 * sock_alloc_send_pskb() to bypass wmem buffer limits
391 */
392 skb_set_owner_w(skb, sk);
393
392 return skb; 394 return skb;
393} 395}
394 396
@@ -428,7 +430,6 @@ static void ndisc_send_skb(struct sk_buff *skb,
428 type = icmp6h->icmp6_type; 430 type = icmp6h->icmp6_type;
429 431
430 if (!dst) { 432 if (!dst) {
431 struct sock *sk = net->ipv6.ndisc_sk;
432 struct flowi6 fl6; 433 struct flowi6 fl6;
433 434
434 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); 435 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
@@ -462,10 +463,10 @@ static void ndisc_send_skb(struct sk_buff *skb,
462 rcu_read_unlock(); 463 rcu_read_unlock();
463} 464}
464 465
465static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, 466void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
466 const struct in6_addr *daddr, 467 const struct in6_addr *daddr,
467 const struct in6_addr *solicited_addr, 468 const struct in6_addr *solicited_addr,
468 bool router, bool solicited, bool override, bool inc_opt) 469 bool router, bool solicited, bool override, bool inc_opt)
469{ 470{
470 struct sk_buff *skb; 471 struct sk_buff *skb;
471 struct in6_addr tmpaddr; 472 struct in6_addr tmpaddr;
@@ -479,7 +480,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
479 if (ifp) { 480 if (ifp) {
480 src_addr = solicited_addr; 481 src_addr = solicited_addr;
481 if (ifp->flags & IFA_F_OPTIMISTIC) 482 if (ifp->flags & IFA_F_OPTIMISTIC)
482 override = 0; 483 override = false;
483 inc_opt |= ifp->idev->cnf.force_tllao; 484 inc_opt |= ifp->idev->cnf.force_tllao;
484 in6_ifa_put(ifp); 485 in6_ifa_put(ifp);
485 } else { 486 } else {
@@ -557,7 +558,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
557 } 558 }
558 559
559 if (ipv6_addr_any(saddr)) 560 if (ipv6_addr_any(saddr))
560 inc_opt = 0; 561 inc_opt = false;
561 if (inc_opt) 562 if (inc_opt)
562 optlen += ndisc_opt_addr_space(dev); 563 optlen += ndisc_opt_addr_space(dev);
563 564
@@ -663,9 +664,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
663 } 664 }
664 ndisc_send_ns(dev, neigh, target, target, saddr); 665 ndisc_send_ns(dev, neigh, target, target, saddr);
665 } else if ((probes -= neigh->parms->app_probes) < 0) { 666 } else if ((probes -= neigh->parms->app_probes) < 0) {
666#ifdef CONFIG_ARPD
667 neigh_app_ns(neigh); 667 neigh_app_ns(neigh);
668#endif
669 } else { 668 } else {
670 addrconf_addr_solict_mult(target, &mcaddr); 669 addrconf_addr_solict_mult(target, &mcaddr);
671 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr); 670 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
@@ -693,7 +692,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
693 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; 692 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
694 const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; 693 const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
695 u8 *lladdr = NULL; 694 u8 *lladdr = NULL;
696 u32 ndoptlen = skb->tail - (skb->transport_header + 695 u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
697 offsetof(struct nd_msg, opt)); 696 offsetof(struct nd_msg, opt));
698 struct ndisc_options ndopts; 697 struct ndisc_options ndopts;
699 struct net_device *dev = skb->dev; 698 struct net_device *dev = skb->dev;
@@ -790,7 +789,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
790 (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { 789 (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
791 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && 790 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
792 skb->pkt_type != PACKET_HOST && 791 skb->pkt_type != PACKET_HOST &&
793 inc != 0 && 792 inc &&
794 idev->nd_parms->proxy_delay != 0) { 793 idev->nd_parms->proxy_delay != 0) {
795 /* 794 /*
796 * for anycast or proxy, 795 * for anycast or proxy,
@@ -853,7 +852,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
853 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; 852 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
854 const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; 853 const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
855 u8 *lladdr = NULL; 854 u8 *lladdr = NULL;
856 u32 ndoptlen = skb->tail - (skb->transport_header + 855 u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
857 offsetof(struct nd_msg, opt)); 856 offsetof(struct nd_msg, opt));
858 struct ndisc_options ndopts; 857 struct ndisc_options ndopts;
859 struct net_device *dev = skb->dev; 858 struct net_device *dev = skb->dev;
@@ -1069,7 +1068,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1069 1068
1070 __u8 * opt = (__u8 *)(ra_msg + 1); 1069 __u8 * opt = (__u8 *)(ra_msg + 1);
1071 1070
1072 optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg); 1071 optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
1072 sizeof(struct ra_msg);
1073 1073
1074 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { 1074 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1075 ND_PRINTK(2, warn, "RA: source address is not link-local\n"); 1075 ND_PRINTK(2, warn, "RA: source address is not link-local\n");
@@ -1346,7 +1346,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1346 u8 *hdr; 1346 u8 *hdr;
1347 struct ndisc_options ndopts; 1347 struct ndisc_options ndopts;
1348 struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); 1348 struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
1349 u32 ndoptlen = skb->tail - (skb->transport_header + 1349 u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
1350 offsetof(struct rd_msg, opt)); 1350 offsetof(struct rd_msg, opt));
1351 1351
1352#ifdef CONFIG_IPV6_NDISC_NODETYPE 1352#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1368,8 +1368,11 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1368 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) 1368 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
1369 return; 1369 return;
1370 1370
1371 if (!ndopts.nd_opts_rh) 1371 if (!ndopts.nd_opts_rh) {
1372 ip6_redirect_no_header(skb, dev_net(skb->dev),
1373 skb->dev->ifindex, 0);
1372 return; 1374 return;
1375 }
1373 1376
1374 hdr = (u8 *)ndopts.nd_opts_rh; 1377 hdr = (u8 *)ndopts.nd_opts_rh;
1375 hdr += 8; 1378 hdr += 8;
@@ -1493,7 +1496,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1493 */ 1496 */
1494 1497
1495 if (ha) 1498 if (ha)
1496 ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha); 1499 ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha);
1497 1500
1498 /* 1501 /*
1499 * build redirect option and copy skb over to the new packet. 1502 * build redirect option and copy skb over to the new packet.
@@ -1516,10 +1519,27 @@ static void pndisc_redo(struct sk_buff *skb)
1516 kfree_skb(skb); 1519 kfree_skb(skb);
1517} 1520}
1518 1521
1522static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
1523{
1524 struct inet6_dev *idev = __in6_dev_get(skb->dev);
1525
1526 if (!idev)
1527 return true;
1528 if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
1529 idev->cnf.suppress_frag_ndisc) {
1530 net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
1531 return true;
1532 }
1533 return false;
1534}
1535
1519int ndisc_rcv(struct sk_buff *skb) 1536int ndisc_rcv(struct sk_buff *skb)
1520{ 1537{
1521 struct nd_msg *msg; 1538 struct nd_msg *msg;
1522 1539
1540 if (ndisc_suppress_frag_ndisc(skb))
1541 return 0;
1542
1523 if (skb_linearize(skb)) 1543 if (skb_linearize(skb))
1524 return 0; 1544 return 0;
1525 1545
@@ -1568,14 +1588,14 @@ int ndisc_rcv(struct sk_buff *skb)
1568 1588
1569static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 1589static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1570{ 1590{
1571 struct net_device *dev = ptr; 1591 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1572 struct net *net = dev_net(dev); 1592 struct net *net = dev_net(dev);
1573 struct inet6_dev *idev; 1593 struct inet6_dev *idev;
1574 1594
1575 switch (event) { 1595 switch (event) {
1576 case NETDEV_CHANGEADDR: 1596 case NETDEV_CHANGEADDR:
1577 neigh_changeaddr(&nd_tbl, dev); 1597 neigh_changeaddr(&nd_tbl, dev);
1578 fib6_run_gc(~0UL, net); 1598 fib6_run_gc(0, net, false);
1579 idev = in6_dev_get(dev); 1599 idev = in6_dev_get(dev);
1580 if (!idev) 1600 if (!idev)
1581 break; 1601 break;
@@ -1585,7 +1605,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
1585 break; 1605 break;
1586 case NETDEV_DOWN: 1606 case NETDEV_DOWN:
1587 neigh_ifdown(&nd_tbl, dev); 1607 neigh_ifdown(&nd_tbl, dev);
1588 fib6_run_gc(~0UL, net); 1608 fib6_run_gc(0, net, false);
1589 break; 1609 break;
1590 case NETDEV_NOTIFY_PEERS: 1610 case NETDEV_NOTIFY_PEERS:
1591 ndisc_send_unsol_na(dev); 1611 ndisc_send_unsol_na(dev);
@@ -1707,24 +1727,28 @@ int __init ndisc_init(void)
1707 if (err) 1727 if (err)
1708 goto out_unregister_pernet; 1728 goto out_unregister_pernet;
1709#endif 1729#endif
1710 err = register_netdevice_notifier(&ndisc_netdev_notifier);
1711 if (err)
1712 goto out_unregister_sysctl;
1713out: 1730out:
1714 return err; 1731 return err;
1715 1732
1716out_unregister_sysctl:
1717#ifdef CONFIG_SYSCTL 1733#ifdef CONFIG_SYSCTL
1718 neigh_sysctl_unregister(&nd_tbl.parms);
1719out_unregister_pernet: 1734out_unregister_pernet:
1720#endif
1721 unregister_pernet_subsys(&ndisc_net_ops); 1735 unregister_pernet_subsys(&ndisc_net_ops);
1722 goto out; 1736 goto out;
1737#endif
1723} 1738}
1724 1739
1725void ndisc_cleanup(void) 1740int __init ndisc_late_init(void)
1741{
1742 return register_netdevice_notifier(&ndisc_netdev_notifier);
1743}
1744
1745void ndisc_late_cleanup(void)
1726{ 1746{
1727 unregister_netdevice_notifier(&ndisc_netdev_notifier); 1747 unregister_netdevice_notifier(&ndisc_netdev_notifier);
1748}
1749
1750void ndisc_cleanup(void)
1751{
1728#ifdef CONFIG_SYSCTL 1752#ifdef CONFIG_SYSCTL
1729 neigh_sysctl_unregister(&nd_tbl.parms); 1753 neigh_sysctl_unregister(&nd_tbl.parms);
1730#endif 1754#endif
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4433ab40e7de..a7f842b29b67 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -153,6 +153,19 @@ config IP6_NF_TARGET_REJECT
153 153
154 To compile it as a module, choose M here. If unsure, say N. 154 To compile it as a module, choose M here. If unsure, say N.
155 155
156config IP6_NF_TARGET_SYNPROXY
157 tristate "SYNPROXY target support"
158 depends on NF_CONNTRACK && NETFILTER_ADVANCED
159 select NETFILTER_SYNPROXY
160 select SYN_COOKIES
161 help
162 The SYNPROXY target allows you to intercept TCP connections and
163 establish them using syncookies before they are passed on to the
164 server. This allows to avoid conntrack and server resource usage
165 during SYN-flood attacks.
166
167 To compile it as a module, choose M here. If unsure, say N.
168
156config IP6_NF_MANGLE 169config IP6_NF_MANGLE
157 tristate "Packet mangling" 170 tristate "Packet mangling"
158 default m if NETFILTER_ADVANCED=n 171 default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2d11fcc2cf3c..2b53738f798c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
14nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o 14nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
15 15
16# l3 independent conntrack 16# l3 independent conntrack
17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o 17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
18 18
19nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o 19nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
20obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o 20obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
@@ -37,3 +37,4 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
37obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o 37obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
38obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o 38obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
39obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o 39obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
40obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 60e9053bab05..3e4e92d5e157 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -71,12 +71,12 @@ static int device_cmp(struct nf_conn *ct, void *ifindex)
71static int masq_device_event(struct notifier_block *this, 71static int masq_device_event(struct notifier_block *this,
72 unsigned long event, void *ptr) 72 unsigned long event, void *ptr)
73{ 73{
74 const struct net_device *dev = ptr; 74 const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
75 struct net *net = dev_net(dev); 75 struct net *net = dev_net(dev);
76 76
77 if (event == NETDEV_DOWN) 77 if (event == NETDEV_DOWN)
78 nf_ct_iterate_cleanup(net, device_cmp, 78 nf_ct_iterate_cleanup(net, device_cmp,
79 (void *)(long)dev->ifindex); 79 (void *)(long)dev->ifindex, 0, 0);
80 80
81 return NOTIFY_DONE; 81 return NOTIFY_DONE;
82} 82}
@@ -89,8 +89,10 @@ static int masq_inet_event(struct notifier_block *this,
89 unsigned long event, void *ptr) 89 unsigned long event, void *ptr)
90{ 90{
91 struct inet6_ifaddr *ifa = ptr; 91 struct inet6_ifaddr *ifa = ptr;
92 struct netdev_notifier_info info;
92 93
93 return masq_device_event(this, event, ifa->idev->dev); 94 netdev_notifier_info_init(&info, ifa->idev->dev);
95 return masq_device_event(this, event, &info);
94} 96}
95 97
96static struct notifier_block masq_inet_notifier = { 98static struct notifier_block masq_inet_notifier = {
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 70f9abc0efe9..56eef30ee5f6 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -169,7 +169,25 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
169 169
170 nf_ct_attach(nskb, oldskb); 170 nf_ct_attach(nskb, oldskb);
171 171
172 ip6_local_out(nskb); 172#ifdef CONFIG_BRIDGE_NETFILTER
173 /* If we use ip6_local_out for bridged traffic, the MAC source on
174 * the RST will be ours, instead of the destination's. This confuses
175 * some routers/firewalls, and they drop the packet. So we need to
176 * build the eth header using the original destination's MAC as the
177 * source, and send the RST packet directly.
178 */
179 if (oldskb->nf_bridge) {
180 struct ethhdr *oeth = eth_hdr(oldskb);
181 nskb->dev = oldskb->nf_bridge->physindev;
182 nskb->protocol = htons(ETH_P_IPV6);
183 ip6h->payload_len = htons(sizeof(struct tcphdr));
184 if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
185 oeth->h_source, oeth->h_dest, nskb->len) < 0)
186 return;
187 dev_queue_xmit(nskb);
188 } else
189#endif
190 ip6_local_out(nskb);
173} 191}
174 192
175static inline void 193static inline void
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
new file mode 100644
index 000000000000..2748b042da72
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -0,0 +1,503 @@
1/*
2 * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <net/ip6_checksum.h>
12#include <net/ip6_route.h>
13#include <net/tcp.h>
14
15#include <linux/netfilter_ipv6/ip6_tables.h>
16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter/xt_SYNPROXY.h>
18#include <net/netfilter/nf_conntrack.h>
19#include <net/netfilter/nf_conntrack_seqadj.h>
20#include <net/netfilter/nf_conntrack_synproxy.h>
21
22static struct ipv6hdr *
23synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
24 const struct in6_addr *daddr)
25{
26 struct ipv6hdr *iph;
27
28 skb_reset_network_header(skb);
29 iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph));
30 ip6_flow_hdr(iph, 0, 0);
31 iph->hop_limit = 64; //XXX
32 iph->nexthdr = IPPROTO_TCP;
33 iph->saddr = *saddr;
34 iph->daddr = *daddr;
35
36 return iph;
37}
38
39static void
40synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
41 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
42 struct ipv6hdr *niph, struct tcphdr *nth,
43 unsigned int tcp_hdr_size)
44{
45 struct net *net = nf_ct_net((struct nf_conn *)nfct);
46 struct dst_entry *dst;
47 struct flowi6 fl6;
48
49 nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
50 nskb->ip_summed = CHECKSUM_PARTIAL;
51 nskb->csum_start = (unsigned char *)nth - nskb->head;
52 nskb->csum_offset = offsetof(struct tcphdr, check);
53
54 memset(&fl6, 0, sizeof(fl6));
55 fl6.flowi6_proto = IPPROTO_TCP;
56 fl6.saddr = niph->saddr;
57 fl6.daddr = niph->daddr;
58 fl6.fl6_sport = nth->source;
59 fl6.fl6_dport = nth->dest;
60 security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
61 dst = ip6_route_output(net, NULL, &fl6);
62 if (dst == NULL || dst->error) {
63 dst_release(dst);
64 goto free_nskb;
65 }
66 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
67 if (IS_ERR(dst))
68 goto free_nskb;
69
70 skb_dst_set(nskb, dst);
71
72 if (nfct) {
73 nskb->nfct = nfct;
74 nskb->nfctinfo = ctinfo;
75 nf_conntrack_get(nfct);
76 }
77
78 ip6_local_out(nskb);
79 return;
80
81free_nskb:
82 kfree_skb(nskb);
83}
84
85static void
86synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
87 const struct synproxy_options *opts)
88{
89 struct sk_buff *nskb;
90 struct ipv6hdr *iph, *niph;
91 struct tcphdr *nth;
92 unsigned int tcp_hdr_size;
93 u16 mss = opts->mss;
94
95 iph = ipv6_hdr(skb);
96
97 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
98 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
99 GFP_ATOMIC);
100 if (nskb == NULL)
101 return;
102 skb_reserve(nskb, MAX_TCP_HEADER);
103
104 niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
105
106 skb_reset_transport_header(nskb);
107 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
108 nth->source = th->dest;
109 nth->dest = th->source;
110 nth->seq = htonl(__cookie_v6_init_sequence(iph, th, &mss));
111 nth->ack_seq = htonl(ntohl(th->seq) + 1);
112 tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
113 if (opts->options & XT_SYNPROXY_OPT_ECN)
114 tcp_flag_word(nth) |= TCP_FLAG_ECE;
115 nth->doff = tcp_hdr_size / 4;
116 nth->window = 0;
117 nth->check = 0;
118 nth->urg_ptr = 0;
119
120 synproxy_build_options(nth, opts);
121
122 synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
123 niph, nth, tcp_hdr_size);
124}
125
126static void
127synproxy_send_server_syn(const struct synproxy_net *snet,
128 const struct sk_buff *skb, const struct tcphdr *th,
129 const struct synproxy_options *opts, u32 recv_seq)
130{
131 struct sk_buff *nskb;
132 struct ipv6hdr *iph, *niph;
133 struct tcphdr *nth;
134 unsigned int tcp_hdr_size;
135
136 iph = ipv6_hdr(skb);
137
138 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
139 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
140 GFP_ATOMIC);
141 if (nskb == NULL)
142 return;
143 skb_reserve(nskb, MAX_TCP_HEADER);
144
145 niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
146
147 skb_reset_transport_header(nskb);
148 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
149 nth->source = th->source;
150 nth->dest = th->dest;
151 nth->seq = htonl(recv_seq - 1);
152 /* ack_seq is used to relay our ISN to the synproxy hook to initialize
153 * sequence number translation once a connection tracking entry exists.
154 */
155 nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
156 tcp_flag_word(nth) = TCP_FLAG_SYN;
157 if (opts->options & XT_SYNPROXY_OPT_ECN)
158 tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
159 nth->doff = tcp_hdr_size / 4;
160 nth->window = th->window;
161 nth->check = 0;
162 nth->urg_ptr = 0;
163
164 synproxy_build_options(nth, opts);
165
166 synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
167 niph, nth, tcp_hdr_size);
168}
169
170static void
171synproxy_send_server_ack(const struct synproxy_net *snet,
172 const struct ip_ct_tcp *state,
173 const struct sk_buff *skb, const struct tcphdr *th,
174 const struct synproxy_options *opts)
175{
176 struct sk_buff *nskb;
177 struct ipv6hdr *iph, *niph;
178 struct tcphdr *nth;
179 unsigned int tcp_hdr_size;
180
181 iph = ipv6_hdr(skb);
182
183 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
184 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
185 GFP_ATOMIC);
186 if (nskb == NULL)
187 return;
188 skb_reserve(nskb, MAX_TCP_HEADER);
189
190 niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
191
192 skb_reset_transport_header(nskb);
193 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
194 nth->source = th->dest;
195 nth->dest = th->source;
196 nth->seq = htonl(ntohl(th->ack_seq));
197 nth->ack_seq = htonl(ntohl(th->seq) + 1);
198 tcp_flag_word(nth) = TCP_FLAG_ACK;
199 nth->doff = tcp_hdr_size / 4;
200 nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
201 nth->check = 0;
202 nth->urg_ptr = 0;
203
204 synproxy_build_options(nth, opts);
205
206 synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
207}
208
209static void
210synproxy_send_client_ack(const struct synproxy_net *snet,
211 const struct sk_buff *skb, const struct tcphdr *th,
212 const struct synproxy_options *opts)
213{
214 struct sk_buff *nskb;
215 struct ipv6hdr *iph, *niph;
216 struct tcphdr *nth;
217 unsigned int tcp_hdr_size;
218
219 iph = ipv6_hdr(skb);
220
221 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
222 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
223 GFP_ATOMIC);
224 if (nskb == NULL)
225 return;
226 skb_reserve(nskb, MAX_TCP_HEADER);
227
228 niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
229
230 skb_reset_transport_header(nskb);
231 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
232 nth->source = th->source;
233 nth->dest = th->dest;
234 nth->seq = htonl(ntohl(th->seq) + 1);
235 nth->ack_seq = th->ack_seq;
236 tcp_flag_word(nth) = TCP_FLAG_ACK;
237 nth->doff = tcp_hdr_size / 4;
238 nth->window = ntohs(htons(th->window) >> opts->wscale);
239 nth->check = 0;
240 nth->urg_ptr = 0;
241
242 synproxy_build_options(nth, opts);
243
244 synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
245}
246
247static bool
248synproxy_recv_client_ack(const struct synproxy_net *snet,
249 const struct sk_buff *skb, const struct tcphdr *th,
250 struct synproxy_options *opts, u32 recv_seq)
251{
252 int mss;
253
254 mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
255 if (mss == 0) {
256 this_cpu_inc(snet->stats->cookie_invalid);
257 return false;
258 }
259
260 this_cpu_inc(snet->stats->cookie_valid);
261 opts->mss = mss;
262
263 if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
264 synproxy_check_timestamp_cookie(opts);
265
266 synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
267 return true;
268}
269
270static unsigned int
271synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
272{
273 const struct xt_synproxy_info *info = par->targinfo;
274 struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
275 struct synproxy_options opts = {};
276 struct tcphdr *th, _th;
277
278 if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
279 return NF_DROP;
280
281 th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
282 if (th == NULL)
283 return NF_DROP;
284
285 if (!synproxy_parse_options(skb, par->thoff, th, &opts))
286 return NF_DROP;
287
288 if (th->syn && !(th->ack || th->fin || th->rst)) {
289 /* Initial SYN from client */
290 this_cpu_inc(snet->stats->syn_received);
291
292 if (th->ece && th->cwr)
293 opts.options |= XT_SYNPROXY_OPT_ECN;
294
295 opts.options &= info->options;
296 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
297 synproxy_init_timestamp_cookie(info, &opts);
298 else
299 opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
300 XT_SYNPROXY_OPT_SACK_PERM |
301 XT_SYNPROXY_OPT_ECN);
302
303 synproxy_send_client_synack(skb, th, &opts);
304 return NF_DROP;
305
306 } else if (th->ack && !(th->fin || th->rst || th->syn)) {
307 /* ACK from client */
308 synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
309 return NF_DROP;
310 }
311
312 return XT_CONTINUE;
313}
314
315static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
316 struct sk_buff *skb,
317 const struct net_device *in,
318 const struct net_device *out,
319 int (*okfn)(struct sk_buff *))
320{
321 struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
322 enum ip_conntrack_info ctinfo;
323 struct nf_conn *ct;
324 struct nf_conn_synproxy *synproxy;
325 struct synproxy_options opts = {};
326 const struct ip_ct_tcp *state;
327 struct tcphdr *th, _th;
328 __be16 frag_off;
329 u8 nexthdr;
330 int thoff;
331
332 ct = nf_ct_get(skb, &ctinfo);
333 if (ct == NULL)
334 return NF_ACCEPT;
335
336 synproxy = nfct_synproxy(ct);
337 if (synproxy == NULL)
338 return NF_ACCEPT;
339
340 if (nf_is_loopback_packet(skb))
341 return NF_ACCEPT;
342
343 nexthdr = ipv6_hdr(skb)->nexthdr;
344 thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
345 &frag_off);
346 if (thoff < 0)
347 return NF_ACCEPT;
348
349 th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
350 if (th == NULL)
351 return NF_DROP;
352
353 state = &ct->proto.tcp;
354 switch (state->state) {
355 case TCP_CONNTRACK_CLOSE:
356 if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
357 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
358 ntohl(th->seq) + 1);
359 break;
360 }
361
362 if (!th->syn || th->ack ||
363 CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
364 break;
365
366 /* Reopened connection - reset the sequence number and timestamp
367 * adjustments, they will get initialized once the connection is
368 * reestablished.
369 */
370 nf_ct_seqadj_init(ct, ctinfo, 0);
371 synproxy->tsoff = 0;
372 this_cpu_inc(snet->stats->conn_reopened);
373
374 /* fall through */
375 case TCP_CONNTRACK_SYN_SENT:
376 if (!synproxy_parse_options(skb, thoff, th, &opts))
377 return NF_DROP;
378
379 if (!th->syn && th->ack &&
380 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
381 /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
382 * therefore we need to add 1 to make the SYN sequence
383 * number match the one of first SYN.
384 */
385 if (synproxy_recv_client_ack(snet, skb, th, &opts,
386 ntohl(th->seq) + 1))
387 this_cpu_inc(snet->stats->cookie_retrans);
388
389 return NF_DROP;
390 }
391
392 synproxy->isn = ntohl(th->ack_seq);
393 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
394 synproxy->its = opts.tsecr;
395 break;
396 case TCP_CONNTRACK_SYN_RECV:
397 if (!th->syn || !th->ack)
398 break;
399
400 if (!synproxy_parse_options(skb, thoff, th, &opts))
401 return NF_DROP;
402
403 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
404 synproxy->tsoff = opts.tsval - synproxy->its;
405
406 opts.options &= ~(XT_SYNPROXY_OPT_MSS |
407 XT_SYNPROXY_OPT_WSCALE |
408 XT_SYNPROXY_OPT_SACK_PERM);
409
410 swap(opts.tsval, opts.tsecr);
411 synproxy_send_server_ack(snet, state, skb, th, &opts);
412
413 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
414
415 swap(opts.tsval, opts.tsecr);
416 synproxy_send_client_ack(snet, skb, th, &opts);
417
418 consume_skb(skb);
419 return NF_STOLEN;
420 default:
421 break;
422 }
423
424 synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
425 return NF_ACCEPT;
426}
427
428static int synproxy_tg6_check(const struct xt_tgchk_param *par)
429{
430 const struct ip6t_entry *e = par->entryinfo;
431
432 if (!(e->ipv6.flags & IP6T_F_PROTO) ||
433 e->ipv6.proto != IPPROTO_TCP ||
434 e->ipv6.invflags & XT_INV_PROTO)
435 return -EINVAL;
436
437 return nf_ct_l3proto_try_module_get(par->family);
438}
439
440static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
441{
442 nf_ct_l3proto_module_put(par->family);
443}
444
445static struct xt_target synproxy_tg6_reg __read_mostly = {
446 .name = "SYNPROXY",
447 .family = NFPROTO_IPV6,
448 .target = synproxy_tg6,
449 .targetsize = sizeof(struct xt_synproxy_info),
450 .checkentry = synproxy_tg6_check,
451 .destroy = synproxy_tg6_destroy,
452 .me = THIS_MODULE,
453};
454
455static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
456 {
457 .hook = ipv6_synproxy_hook,
458 .owner = THIS_MODULE,
459 .pf = NFPROTO_IPV6,
460 .hooknum = NF_INET_LOCAL_IN,
461 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
462 },
463 {
464 .hook = ipv6_synproxy_hook,
465 .owner = THIS_MODULE,
466 .pf = NFPROTO_IPV6,
467 .hooknum = NF_INET_POST_ROUTING,
468 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
469 },
470};
471
472static int __init synproxy_tg6_init(void)
473{
474 int err;
475
476 err = nf_register_hooks(ipv6_synproxy_ops,
477 ARRAY_SIZE(ipv6_synproxy_ops));
478 if (err < 0)
479 goto err1;
480
481 err = xt_register_target(&synproxy_tg6_reg);
482 if (err < 0)
483 goto err2;
484
485 return 0;
486
487err2:
488 nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
489err1:
490 return err;
491}
492
493static void __exit synproxy_tg6_exit(void)
494{
495 xt_unregister_target(&synproxy_tg6_reg);
496 nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
497}
498
499module_init(synproxy_tg6_init);
500module_exit(synproxy_tg6_exit);
501
502MODULE_LICENSE("GPL");
503MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index beb5777d2043..29b44b14c5ea 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -61,7 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
61 net->ipv6.ip6table_filter = 61 net->ipv6.ip6table_filter =
62 ip6t_register_table(net, &packet_filter, repl); 62 ip6t_register_table(net, &packet_filter, repl);
63 kfree(repl); 63 kfree(repl);
64 return PTR_RET(net->ipv6.ip6table_filter); 64 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter);
65} 65}
66 66
67static void __net_exit ip6table_filter_net_exit(struct net *net) 67static void __net_exit ip6table_filter_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index e075399d8b72..c705907ae6ab 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -101,7 +101,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
101 net->ipv6.ip6table_mangle = 101 net->ipv6.ip6table_mangle =
102 ip6t_register_table(net, &packet_mangler, repl); 102 ip6t_register_table(net, &packet_mangler, repl);
103 kfree(repl); 103 kfree(repl);
104 return PTR_RET(net->ipv6.ip6table_mangle); 104 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle);
105} 105}
106 106
107static void __net_exit ip6table_mangle_net_exit(struct net *net) 107static void __net_exit ip6table_mangle_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 6383f90efda8..9b076d2d3a7b 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -293,7 +293,7 @@ static int __net_init ip6table_nat_net_init(struct net *net)
293 return -ENOMEM; 293 return -ENOMEM;
294 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); 294 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
295 kfree(repl); 295 kfree(repl);
296 return PTR_RET(net->ipv6.ip6table_nat); 296 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat);
297} 297}
298 298
299static void __net_exit ip6table_nat_net_exit(struct net *net) 299static void __net_exit ip6table_nat_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 60d1bddff7a0..9a626d86720f 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,7 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
40 net->ipv6.ip6table_raw = 40 net->ipv6.ip6table_raw =
41 ip6t_register_table(net, &packet_raw, repl); 41 ip6t_register_table(net, &packet_raw, repl);
42 kfree(repl); 42 kfree(repl);
43 return PTR_RET(net->ipv6.ip6table_raw); 43 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw);
44} 44}
45 45
46static void __net_exit ip6table_raw_net_exit(struct net *net) 46static void __net_exit ip6table_raw_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index db155351339c..ce88d1d7e525 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -58,7 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
58 net->ipv6.ip6table_security = 58 net->ipv6.ip6table_security =
59 ip6t_register_table(net, &security_table, repl); 59 ip6t_register_table(net, &security_table, repl);
60 kfree(repl); 60 kfree(repl);
61 return PTR_RET(net->ipv6.ip6table_security); 61 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security);
62} 62}
63 63
64static void __net_exit ip6table_security_net_exit(struct net *net) 64static void __net_exit ip6table_security_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 97bcf2bae857..d6e4dd8b58df 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -28,6 +28,7 @@
28#include <net/netfilter/nf_conntrack_l3proto.h> 28#include <net/netfilter/nf_conntrack_l3proto.h>
29#include <net/netfilter/nf_conntrack_core.h> 29#include <net/netfilter/nf_conntrack_core.h>
30#include <net/netfilter/nf_conntrack_zones.h> 30#include <net/netfilter/nf_conntrack_zones.h>
31#include <net/netfilter/nf_conntrack_seqadj.h>
31#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 32#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
32#include <net/netfilter/nf_nat_helper.h> 33#include <net/netfilter/nf_nat_helper.h>
33#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 34#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
@@ -158,11 +159,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
158 /* adjust seqs for loopback traffic only in outgoing direction */ 159 /* adjust seqs for loopback traffic only in outgoing direction */
159 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 160 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
160 !nf_is_loopback_packet(skb)) { 161 !nf_is_loopback_packet(skb)) {
161 typeof(nf_nat_seq_adjust_hook) seq_adjust; 162 if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
162
163 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
164 if (!seq_adjust ||
165 !seq_adjust(skb, ct, ctinfo, protoff)) {
166 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 163 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
167 return NF_DROP; 164 return NF_DROP;
168 } 165 }
@@ -204,7 +201,7 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
204 if (ct != NULL && !nf_ct_is_untracked(ct)) { 201 if (ct != NULL && !nf_ct_is_untracked(ct)) {
205 help = nfct_help(ct); 202 help = nfct_help(ct);
206 if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { 203 if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
207 nf_conntrack_get_reasm(skb); 204 nf_conntrack_get_reasm(reasm);
208 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, 205 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
209 (struct net_device *)in, 206 (struct net_device *)in,
210 (struct net_device *)out, 207 (struct net_device *)out,
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 61aaf70f376e..2205e8eeeacf 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -69,8 +69,8 @@ icmpv6_manip_pkt(struct sk_buff *skb,
69 hdr = (struct icmp6hdr *)(skb->data + hdroff); 69 hdr = (struct icmp6hdr *)(skb->data + hdroff);
70 l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum, 70 l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
71 tuple, maniptype); 71 tuple, maniptype);
72 if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST || 72 if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
73 hdr->icmp6_code == ICMPV6_ECHO_REPLY) { 73 hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
74 inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, 74 inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
75 hdr->icmp6_identifier, 75 hdr->icmp6_identifier,
76 tuple->src.u.icmp.id, 0); 76 tuple->src.u.icmp.id, 0);
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index c2e73e647e44..827f795209cf 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -5,6 +5,7 @@
5#include <linux/export.h> 5#include <linux/export.h>
6#include <net/ipv6.h> 6#include <net/ipv6.h>
7#include <net/ip6_fib.h> 7#include <net/ip6_fib.h>
8#include <net/addrconf.h>
8 9
9void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) 10void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
10{ 11{
@@ -40,7 +41,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
40 u16 offset = sizeof(struct ipv6hdr); 41 u16 offset = sizeof(struct ipv6hdr);
41 struct ipv6_opt_hdr *exthdr = 42 struct ipv6_opt_hdr *exthdr =
42 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 43 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
43 unsigned int packet_len = skb->tail - skb->network_header; 44 unsigned int packet_len = skb_tail_pointer(skb) -
45 skb_network_header(skb);
44 int found_rhdr = 0; 46 int found_rhdr = 0;
45 *nexthdr = &ipv6_hdr(skb)->nexthdr; 47 *nexthdr = &ipv6_hdr(skb)->nexthdr;
46 48
@@ -74,3 +76,50 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
74 return offset; 76 return offset;
75} 77}
76EXPORT_SYMBOL(ip6_find_1stfragopt); 78EXPORT_SYMBOL(ip6_find_1stfragopt);
79
80#if IS_ENABLED(CONFIG_IPV6)
81int ip6_dst_hoplimit(struct dst_entry *dst)
82{
83 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
84 if (hoplimit == 0) {
85 struct net_device *dev = dst->dev;
86 struct inet6_dev *idev;
87
88 rcu_read_lock();
89 idev = __in6_dev_get(dev);
90 if (idev)
91 hoplimit = idev->cnf.hop_limit;
92 else
93 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
94 rcu_read_unlock();
95 }
96 return hoplimit;
97}
98EXPORT_SYMBOL(ip6_dst_hoplimit);
99#endif
100
101int __ip6_local_out(struct sk_buff *skb)
102{
103 int len;
104
105 len = skb->len - sizeof(struct ipv6hdr);
106 if (len > IPV6_MAXPLEN)
107 len = 0;
108 ipv6_hdr(skb)->payload_len = htons(len);
109
110 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
111 skb_dst(skb)->dev, dst_output);
112}
113EXPORT_SYMBOL_GPL(__ip6_local_out);
114
115int ip6_local_out(struct sk_buff *skb)
116{
117 int err;
118
119 err = __ip6_local_out(skb);
120 if (likely(err == 1))
121 err = dst_output(skb);
122
123 return err;
124}
125EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
new file mode 100644
index 000000000000..18f19df4189f
--- /dev/null
+++ b/net/ipv6/ping.c
@@ -0,0 +1,277 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * "Ping" sockets
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * Based on ipv4/ping.c code.
14 *
15 * Authors: Lorenzo Colitti (IPv6 support)
16 * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6),
17 * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32)
18 *
19 */
20
21#include <net/addrconf.h>
22#include <net/ipv6.h>
23#include <net/ip6_route.h>
24#include <net/protocol.h>
25#include <net/udp.h>
26#include <net/transp_v6.h>
27#include <net/ping.h>
28
29struct proto pingv6_prot = {
30 .name = "PINGv6",
31 .owner = THIS_MODULE,
32 .init = ping_init_sock,
33 .close = ping_close,
34 .connect = ip6_datagram_connect,
35 .disconnect = udp_disconnect,
36 .setsockopt = ipv6_setsockopt,
37 .getsockopt = ipv6_getsockopt,
38 .sendmsg = ping_v6_sendmsg,
39 .recvmsg = ping_recvmsg,
40 .bind = ping_bind,
41 .backlog_rcv = ping_queue_rcv_skb,
42 .hash = ping_hash,
43 .unhash = ping_unhash,
44 .get_port = ping_get_port,
45 .obj_size = sizeof(struct raw6_sock),
46};
47EXPORT_SYMBOL_GPL(pingv6_prot);
48
49static struct inet_protosw pingv6_protosw = {
50 .type = SOCK_DGRAM,
51 .protocol = IPPROTO_ICMPV6,
52 .prot = &pingv6_prot,
53 .ops = &inet6_dgram_ops,
54 .no_check = UDP_CSUM_DEFAULT,
55 .flags = INET_PROTOSW_REUSE,
56};
57
58
59/* Compatibility glue so we can support IPv6 when it's compiled as a module */
60static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
61{
62 return -EAFNOSUPPORT;
63}
64static int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
65 struct sk_buff *skb)
66{
67 return -EAFNOSUPPORT;
68}
69static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)
70{
71 return -EAFNOSUPPORT;
72}
73static void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
74 __be16 port, u32 info, u8 *payload) {}
75static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
76 const struct net_device *dev, int strict)
77{
78 return 0;
79}
80
81int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
82 size_t len)
83{
84 struct inet_sock *inet = inet_sk(sk);
85 struct ipv6_pinfo *np = inet6_sk(sk);
86 struct icmp6hdr user_icmph;
87 int addr_type;
88 struct in6_addr *daddr;
89 int iif = 0;
90 struct flowi6 fl6;
91 int err;
92 int hlimit;
93 struct dst_entry *dst;
94 struct rt6_info *rt;
95 struct pingfakehdr pfh;
96
97 pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
98
99 err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
100 sizeof(user_icmph));
101 if (err)
102 return err;
103
104 if (msg->msg_name) {
105 struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name;
106 if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
107 u->sin6_family != AF_INET6) {
108 return -EINVAL;
109 }
110 if (sk->sk_bound_dev_if &&
111 sk->sk_bound_dev_if != u->sin6_scope_id) {
112 return -EINVAL;
113 }
114 daddr = &(u->sin6_addr);
115 iif = u->sin6_scope_id;
116 } else {
117 if (sk->sk_state != TCP_ESTABLISHED)
118 return -EDESTADDRREQ;
119 daddr = &np->daddr;
120 }
121
122 if (!iif)
123 iif = sk->sk_bound_dev_if;
124
125 addr_type = ipv6_addr_type(daddr);
126 if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
127 return -EINVAL;
128 if (addr_type & IPV6_ADDR_MAPPED)
129 return -EINVAL;
130
131 /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
132
133 memset(&fl6, 0, sizeof(fl6));
134
135 fl6.flowi6_proto = IPPROTO_ICMPV6;
136 fl6.saddr = np->saddr;
137 fl6.daddr = *daddr;
138 fl6.fl6_icmp_type = user_icmph.icmp6_type;
139 fl6.fl6_icmp_code = user_icmph.icmp6_code;
140 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
141
142 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
143 fl6.flowi6_oif = np->mcast_oif;
144 else if (!fl6.flowi6_oif)
145 fl6.flowi6_oif = np->ucast_oif;
146
147 dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1);
148 if (IS_ERR(dst))
149 return PTR_ERR(dst);
150 rt = (struct rt6_info *) dst;
151
152 np = inet6_sk(sk);
153 if (!np)
154 return -EBADF;
155
156 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
157 fl6.flowi6_oif = np->mcast_oif;
158 else if (!fl6.flowi6_oif)
159 fl6.flowi6_oif = np->ucast_oif;
160
161 pfh.icmph.type = user_icmph.icmp6_type;
162 pfh.icmph.code = user_icmph.icmp6_code;
163 pfh.icmph.checksum = 0;
164 pfh.icmph.un.echo.id = inet->inet_sport;
165 pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
166 pfh.iov = msg->msg_iov;
167 pfh.wcheck = 0;
168 pfh.family = AF_INET6;
169
170 if (ipv6_addr_is_multicast(&fl6.daddr))
171 hlimit = np->mcast_hops;
172 else
173 hlimit = np->hop_limit;
174 if (hlimit < 0)
175 hlimit = ip6_dst_hoplimit(dst);
176
177 lock_sock(sk);
178 err = ip6_append_data(sk, ping_getfrag, &pfh, len,
179 0, hlimit,
180 np->tclass, NULL, &fl6, rt,
181 MSG_DONTWAIT, np->dontfrag);
182
183 if (err) {
184 ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
185 ICMP6_MIB_OUTERRORS);
186 ip6_flush_pending_frames(sk);
187 } else {
188 err = icmpv6_push_pending_frames(sk, &fl6,
189 (struct icmp6hdr *) &pfh.icmph,
190 len);
191 }
192 release_sock(sk);
193
194 if (err)
195 return err;
196
197 return len;
198}
199
200#ifdef CONFIG_PROC_FS
201static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos)
202{
203 return ping_seq_start(seq, pos, AF_INET6);
204}
205
206static int ping_v6_seq_show(struct seq_file *seq, void *v)
207{
208 if (v == SEQ_START_TOKEN) {
209 seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
210 } else {
211 int bucket = ((struct ping_iter_state *) seq->private)->bucket;
212 struct inet_sock *inet = inet_sk(v);
213 __u16 srcp = ntohs(inet->inet_sport);
214 __u16 destp = ntohs(inet->inet_dport);
215 ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
216 }
217 return 0;
218}
219
220static struct ping_seq_afinfo ping_v6_seq_afinfo = {
221 .name = "icmp6",
222 .family = AF_INET6,
223 .seq_fops = &ping_seq_fops,
224 .seq_ops = {
225 .start = ping_v6_seq_start,
226 .show = ping_v6_seq_show,
227 .next = ping_seq_next,
228 .stop = ping_seq_stop,
229 },
230};
231
232static int __net_init ping_v6_proc_init_net(struct net *net)
233{
234 return ping_proc_register(net, &ping_v6_seq_afinfo);
235}
236
237static void __net_init ping_v6_proc_exit_net(struct net *net)
238{
239 return ping_proc_unregister(net, &ping_v6_seq_afinfo);
240}
241
242static struct pernet_operations ping_v6_net_ops = {
243 .init = ping_v6_proc_init_net,
244 .exit = ping_v6_proc_exit_net,
245};
246#endif
247
248int __init pingv6_init(void)
249{
250#ifdef CONFIG_PROC_FS
251 int ret = register_pernet_subsys(&ping_v6_net_ops);
252 if (ret)
253 return ret;
254#endif
255 pingv6_ops.ipv6_recv_error = ipv6_recv_error;
256 pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl;
257 pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;
258 pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;
259 pingv6_ops.ipv6_chk_addr = ipv6_chk_addr;
260 return inet6_register_protosw(&pingv6_protosw);
261}
262
263/* This never gets called because it's not possible to unload the ipv6 module,
264 * but just in case.
265 */
266void pingv6_exit(void)
267{
268 pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error;
269 pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl;
270 pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;
271 pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;
272 pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr;
273#ifdef CONFIG_PROC_FS
274 unregister_pernet_subsys(&ping_v6_net_ops);
275#endif
276 inet6_unregister_protosw(&pingv6_protosw);
277}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 51c3285b5d9b..091d066a57b3 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -91,6 +91,10 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
91 SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), 91 SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
92 SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), 92 SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
93 /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */ 93 /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */
94 SNMP_MIB_ITEM("Ip6InNoECTPkts", IPSTATS_MIB_NOECTPKTS),
95 SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
96 SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
97 SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS),
94 SNMP_MIB_SENTINEL 98 SNMP_MIB_SENTINEL
95}; 99};
96 100
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index eedff8ccded5..a4ed2416399e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -63,6 +63,8 @@
63#include <linux/seq_file.h> 63#include <linux/seq_file.h>
64#include <linux/export.h> 64#include <linux/export.h>
65 65
66#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */
67
66static struct raw_hashinfo raw_v6_hashinfo = { 68static struct raw_hashinfo raw_v6_hashinfo = {
67 .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), 69 .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
68}; 70};
@@ -108,11 +110,14 @@ found:
108 */ 110 */
109static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) 111static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
110{ 112{
111 struct icmp6hdr *_hdr; 113 struct icmp6hdr _hdr;
112 const struct icmp6hdr *hdr; 114 const struct icmp6hdr *hdr;
113 115
116 /* We require only the four bytes of the ICMPv6 header, not any
117 * additional bytes of message body in "struct icmp6hdr".
118 */
114 hdr = skb_header_pointer(skb, skb_transport_offset(skb), 119 hdr = skb_header_pointer(skb, skb_transport_offset(skb),
115 sizeof(_hdr), &_hdr); 120 ICMPV6_HDRLEN, &_hdr);
116 if (hdr) { 121 if (hdr) {
117 const __u32 *data = &raw6_sk(sk)->filter.data[0]; 122 const __u32 *data = &raw6_sk(sk)->filter.data[0];
118 unsigned int type = hdr->icmp6_type; 123 unsigned int type = hdr->icmp6_type;
@@ -330,8 +335,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
330 ip6_sk_update_pmtu(skb, sk, info); 335 ip6_sk_update_pmtu(skb, sk, info);
331 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); 336 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
332 } 337 }
333 if (type == NDISC_REDIRECT) 338 if (type == NDISC_REDIRECT) {
334 ip6_sk_redirect(skb, sk); 339 ip6_sk_redirect(skb, sk);
340 return;
341 }
335 if (np->recverr) { 342 if (np->recverr) {
336 u8 *payload = skb->data; 343 u8 *payload = skb->data;
337 if (!inet->hdrincl) 344 if (!inet->hdrincl)
@@ -628,6 +635,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
628 goto error; 635 goto error;
629 skb_reserve(skb, hlen); 636 skb_reserve(skb, hlen);
630 637
638 skb->protocol = htons(ETH_P_IPV6);
631 skb->priority = sk->sk_priority; 639 skb->priority = sk->sk_priority;
632 skb->mark = sk->sk_mark; 640 skb->mark = sk->sk_mark;
633 skb_dst_set(skb, &rt->dst); 641 skb_dst_set(skb, &rt->dst);
@@ -1132,7 +1140,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
1132 spin_lock_bh(&sk->sk_receive_queue.lock); 1140 spin_lock_bh(&sk->sk_receive_queue.lock);
1133 skb = skb_peek(&sk->sk_receive_queue); 1141 skb = skb_peek(&sk->sk_receive_queue);
1134 if (skb != NULL) 1142 if (skb != NULL)
1135 amount = skb->tail - skb->transport_header; 1143 amount = skb_tail_pointer(skb) -
1144 skb_transport_header(skb);
1136 spin_unlock_bh(&sk->sk_receive_queue.lock); 1145 spin_unlock_bh(&sk->sk_receive_queue.lock);
1137 return put_user(amount, (int __user *)arg); 1146 return put_user(amount, (int __user *)arg);
1138 } 1147 }
@@ -1226,45 +1235,16 @@ struct proto rawv6_prot = {
1226}; 1235};
1227 1236
1228#ifdef CONFIG_PROC_FS 1237#ifdef CONFIG_PROC_FS
1229static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1230{
1231 struct ipv6_pinfo *np = inet6_sk(sp);
1232 const struct in6_addr *dest, *src;
1233 __u16 destp, srcp;
1234
1235 dest = &np->daddr;
1236 src = &np->rcv_saddr;
1237 destp = 0;
1238 srcp = inet_sk(sp)->inet_num;
1239 seq_printf(seq,
1240 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1241 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
1242 i,
1243 src->s6_addr32[0], src->s6_addr32[1],
1244 src->s6_addr32[2], src->s6_addr32[3], srcp,
1245 dest->s6_addr32[0], dest->s6_addr32[1],
1246 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1247 sp->sk_state,
1248 sk_wmem_alloc_get(sp),
1249 sk_rmem_alloc_get(sp),
1250 0, 0L, 0,
1251 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1252 0,
1253 sock_i_ino(sp),
1254 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
1255}
1256
1257static int raw6_seq_show(struct seq_file *seq, void *v) 1238static int raw6_seq_show(struct seq_file *seq, void *v)
1258{ 1239{
1259 if (v == SEQ_START_TOKEN) 1240 if (v == SEQ_START_TOKEN) {
1260 seq_printf(seq, 1241 seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
1261 " sl " 1242 } else {
1262 "local_address " 1243 struct sock *sp = v;
1263 "remote_address " 1244 __u16 srcp = inet_sk(sp)->inet_num;
1264 "st tx_queue rx_queue tr tm->when retrnsmt" 1245 ip6_dgram_sock_seq_show(seq, v, srcp, 0,
1265 " uid timeout inode ref pointer drops\n"); 1246 raw_seq_private(seq)->bucket);
1266 else 1247 }
1267 raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
1268 return 0; 1248 return 0;
1269} 1249}
1270 1250
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 790d9f4b8b0b..1aeb473b2cc6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -490,6 +490,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
490 ipv6_hdr(head)->payload_len = htons(payload_len); 490 ipv6_hdr(head)->payload_len = htons(payload_len);
491 ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); 491 ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
492 IP6CB(head)->nhoff = nhoff; 492 IP6CB(head)->nhoff = nhoff;
493 IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
493 494
494 /* Yes, and fold redundant checksum back. 8) */ 495 /* Yes, and fold redundant checksum back. 8) */
495 if (head->ip_summed == CHECKSUM_COMPLETE) 496 if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -524,6 +525,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
524 struct net *net = dev_net(skb_dst(skb)->dev); 525 struct net *net = dev_net(skb_dst(skb)->dev);
525 int evicted; 526 int evicted;
526 527
528 if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
529 goto fail_hdr;
530
527 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); 531 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
528 532
529 /* Jumbo payload inhibits frag. header */ 533 /* Jumbo payload inhibits frag. header */
@@ -544,6 +548,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
544 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); 548 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
545 549
546 IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); 550 IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
551 IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
547 return 1; 552 return 1;
548 } 553 }
549 554
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ad0aa6b0b86a..c979dd96d82a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -65,6 +65,12 @@
65#include <linux/sysctl.h> 65#include <linux/sysctl.h>
66#endif 66#endif
67 67
68enum rt6_nud_state {
69 RT6_NUD_FAIL_HARD = -2,
70 RT6_NUD_FAIL_SOFT = -1,
71 RT6_NUD_SUCCEED = 1
72};
73
68static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 74static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69 const struct in6_addr *dest); 75 const struct in6_addr *dest);
70static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
@@ -83,6 +89,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 struct sk_buff *skb, u32 mtu); 89 struct sk_buff *skb, u32 mtu);
84static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 90static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 struct sk_buff *skb); 91 struct sk_buff *skb);
92static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
86 93
87#ifdef CONFIG_IPV6_ROUTE_INFO 94#ifdef CONFIG_IPV6_ROUTE_INFO
88static struct rt6_info *rt6_add_route_info(struct net *net, 95static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -276,9 +283,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
276 283
277 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 284 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
278 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); 285 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
279 rt->rt6i_genid = rt_genid(net); 286 rt->rt6i_genid = rt_genid_ipv6(net);
280 INIT_LIST_HEAD(&rt->rt6i_siblings); 287 INIT_LIST_HEAD(&rt->rt6i_siblings);
281 rt->rt6i_nsiblings = 0;
282 } 288 }
283 return rt; 289 return rt;
284} 290}
@@ -394,7 +400,8 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count,
394} 400}
395 401
396static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 402static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
397 struct flowi6 *fl6) 403 struct flowi6 *fl6, int oif,
404 int strict)
398{ 405{
399 struct rt6_info *sibling, *next_sibling; 406 struct rt6_info *sibling, *next_sibling;
400 int route_choosen; 407 int route_choosen;
@@ -408,6 +415,8 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
408 &match->rt6i_siblings, rt6i_siblings) { 415 &match->rt6i_siblings, rt6i_siblings) {
409 route_choosen--; 416 route_choosen--;
410 if (route_choosen == 0) { 417 if (route_choosen == 0) {
418 if (rt6_score_route(sibling, oif, strict) < 0)
419 break;
411 match = sibling; 420 match = sibling;
412 break; 421 break;
413 } 422 }
@@ -527,26 +536,29 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
527 return 0; 536 return 0;
528} 537}
529 538
530static inline bool rt6_check_neigh(struct rt6_info *rt) 539static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
531{ 540{
532 struct neighbour *neigh; 541 struct neighbour *neigh;
533 bool ret = false; 542 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
534 543
535 if (rt->rt6i_flags & RTF_NONEXTHOP || 544 if (rt->rt6i_flags & RTF_NONEXTHOP ||
536 !(rt->rt6i_flags & RTF_GATEWAY)) 545 !(rt->rt6i_flags & RTF_GATEWAY))
537 return true; 546 return RT6_NUD_SUCCEED;
538 547
539 rcu_read_lock_bh(); 548 rcu_read_lock_bh();
540 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 549 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
541 if (neigh) { 550 if (neigh) {
542 read_lock(&neigh->lock); 551 read_lock(&neigh->lock);
543 if (neigh->nud_state & NUD_VALID) 552 if (neigh->nud_state & NUD_VALID)
544 ret = true; 553 ret = RT6_NUD_SUCCEED;
545#ifdef CONFIG_IPV6_ROUTER_PREF 554#ifdef CONFIG_IPV6_ROUTER_PREF
546 else if (!(neigh->nud_state & NUD_FAILED)) 555 else if (!(neigh->nud_state & NUD_FAILED))
547 ret = true; 556 ret = RT6_NUD_SUCCEED;
548#endif 557#endif
549 read_unlock(&neigh->lock); 558 read_unlock(&neigh->lock);
559 } else {
560 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
561 RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
550 } 562 }
551 rcu_read_unlock_bh(); 563 rcu_read_unlock_bh();
552 564
@@ -560,43 +572,52 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
560 572
561 m = rt6_check_dev(rt, oif); 573 m = rt6_check_dev(rt, oif);
562 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 574 if (!m && (strict & RT6_LOOKUP_F_IFACE))
563 return -1; 575 return RT6_NUD_FAIL_HARD;
564#ifdef CONFIG_IPV6_ROUTER_PREF 576#ifdef CONFIG_IPV6_ROUTER_PREF
565 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 577 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
566#endif 578#endif
567 if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE)) 579 if (strict & RT6_LOOKUP_F_REACHABLE) {
568 return -1; 580 int n = rt6_check_neigh(rt);
581 if (n < 0)
582 return n;
583 }
569 return m; 584 return m;
570} 585}
571 586
572static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 587static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
573 int *mpri, struct rt6_info *match) 588 int *mpri, struct rt6_info *match,
589 bool *do_rr)
574{ 590{
575 int m; 591 int m;
592 bool match_do_rr = false;
576 593
577 if (rt6_check_expired(rt)) 594 if (rt6_check_expired(rt))
578 goto out; 595 goto out;
579 596
580 m = rt6_score_route(rt, oif, strict); 597 m = rt6_score_route(rt, oif, strict);
581 if (m < 0) 598 if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
599 match_do_rr = true;
600 m = 0; /* lowest valid score */
601 } else if (m < 0) {
582 goto out; 602 goto out;
603 }
604
605 if (strict & RT6_LOOKUP_F_REACHABLE)
606 rt6_probe(rt);
583 607
584 if (m > *mpri) { 608 if (m > *mpri) {
585 if (strict & RT6_LOOKUP_F_REACHABLE) 609 *do_rr = match_do_rr;
586 rt6_probe(match);
587 *mpri = m; 610 *mpri = m;
588 match = rt; 611 match = rt;
589 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
590 rt6_probe(rt);
591 } 612 }
592
593out: 613out:
594 return match; 614 return match;
595} 615}
596 616
597static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 617static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
598 struct rt6_info *rr_head, 618 struct rt6_info *rr_head,
599 u32 metric, int oif, int strict) 619 u32 metric, int oif, int strict,
620 bool *do_rr)
600{ 621{
601 struct rt6_info *rt, *match; 622 struct rt6_info *rt, *match;
602 int mpri = -1; 623 int mpri = -1;
@@ -604,10 +625,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
604 match = NULL; 625 match = NULL;
605 for (rt = rr_head; rt && rt->rt6i_metric == metric; 626 for (rt = rr_head; rt && rt->rt6i_metric == metric;
606 rt = rt->dst.rt6_next) 627 rt = rt->dst.rt6_next)
607 match = find_match(rt, oif, strict, &mpri, match); 628 match = find_match(rt, oif, strict, &mpri, match, do_rr);
608 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 629 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
609 rt = rt->dst.rt6_next) 630 rt = rt->dst.rt6_next)
610 match = find_match(rt, oif, strict, &mpri, match); 631 match = find_match(rt, oif, strict, &mpri, match, do_rr);
611 632
612 return match; 633 return match;
613} 634}
@@ -616,15 +637,16 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
616{ 637{
617 struct rt6_info *match, *rt0; 638 struct rt6_info *match, *rt0;
618 struct net *net; 639 struct net *net;
640 bool do_rr = false;
619 641
620 rt0 = fn->rr_ptr; 642 rt0 = fn->rr_ptr;
621 if (!rt0) 643 if (!rt0)
622 fn->rr_ptr = rt0 = fn->leaf; 644 fn->rr_ptr = rt0 = fn->leaf;
623 645
624 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 646 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
647 &do_rr);
625 648
626 if (!match && 649 if (do_rr) {
627 (strict & RT6_LOOKUP_F_REACHABLE)) {
628 struct rt6_info *next = rt0->dst.rt6_next; 650 struct rt6_info *next = rt0->dst.rt6_next;
629 651
630 /* no entries matched; do round-robin */ 652 /* no entries matched; do round-robin */
@@ -743,7 +765,7 @@ restart:
743 rt = fn->leaf; 765 rt = fn->leaf;
744 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 766 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
745 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 767 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
746 rt = rt6_multipath_select(rt, fl6); 768 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
747 BACKTRACK(net, &fl6->saddr); 769 BACKTRACK(net, &fl6->saddr);
748out: 770out:
749 dst_use(&rt->dst, jiffies); 771 dst_use(&rt->dst, jiffies);
@@ -875,8 +897,8 @@ restart_2:
875 897
876restart: 898restart:
877 rt = rt6_select(fn, oif, strict | reachable); 899 rt = rt6_select(fn, oif, strict | reachable);
878 if (rt->rt6i_nsiblings && oif == 0) 900 if (rt->rt6i_nsiblings)
879 rt = rt6_multipath_select(rt, fl6); 901 rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
880 BACKTRACK(net, &fl6->saddr); 902 BACKTRACK(net, &fl6->saddr);
881 if (rt == net->ipv6.ip6_null_entry || 903 if (rt == net->ipv6.ip6_null_entry ||
882 rt->rt6i_flags & RTF_CACHE) 904 rt->rt6i_flags & RTF_CACHE)
@@ -1039,7 +1061,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1039 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1061 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1040 * into this function always. 1062 * into this function always.
1041 */ 1063 */
1042 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) 1064 if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
1043 return NULL; 1065 return NULL;
1044 1066
1045 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 1067 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
@@ -1074,10 +1096,13 @@ static void ip6_link_failure(struct sk_buff *skb)
1074 1096
1075 rt = (struct rt6_info *) skb_dst(skb); 1097 rt = (struct rt6_info *) skb_dst(skb);
1076 if (rt) { 1098 if (rt) {
1077 if (rt->rt6i_flags & RTF_CACHE) 1099 if (rt->rt6i_flags & RTF_CACHE) {
1078 rt6_update_expires(rt, 0); 1100 dst_hold(&rt->dst);
1079 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 1101 if (ip6_del_rt(rt))
1102 dst_free(&rt->dst);
1103 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1080 rt->rt6i_node->fn_sernum = -1; 1104 rt->rt6i_node->fn_sernum = -1;
1105 }
1081 } 1106 }
1082} 1107}
1083 1108
@@ -1131,6 +1156,77 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1131} 1156}
1132EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1157EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1133 1158
1159/* Handle redirects */
1160struct ip6rd_flowi {
1161 struct flowi6 fl6;
1162 struct in6_addr gateway;
1163};
1164
1165static struct rt6_info *__ip6_route_redirect(struct net *net,
1166 struct fib6_table *table,
1167 struct flowi6 *fl6,
1168 int flags)
1169{
1170 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1171 struct rt6_info *rt;
1172 struct fib6_node *fn;
1173
1174 /* Get the "current" route for this destination and
1175 * check if the redirect has come from approriate router.
1176 *
1177 * RFC 4861 specifies that redirects should only be
1178 * accepted if they come from the nexthop to the target.
1179 * Due to the way the routes are chosen, this notion
1180 * is a bit fuzzy and one might need to check all possible
1181 * routes.
1182 */
1183
1184 read_lock_bh(&table->tb6_lock);
1185 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1186restart:
1187 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1188 if (rt6_check_expired(rt))
1189 continue;
1190 if (rt->dst.error)
1191 break;
1192 if (!(rt->rt6i_flags & RTF_GATEWAY))
1193 continue;
1194 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1195 continue;
1196 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1197 continue;
1198 break;
1199 }
1200
1201 if (!rt)
1202 rt = net->ipv6.ip6_null_entry;
1203 else if (rt->dst.error) {
1204 rt = net->ipv6.ip6_null_entry;
1205 goto out;
1206 }
1207 BACKTRACK(net, &fl6->saddr);
1208out:
1209 dst_hold(&rt->dst);
1210
1211 read_unlock_bh(&table->tb6_lock);
1212
1213 return rt;
1214};
1215
1216static struct dst_entry *ip6_route_redirect(struct net *net,
1217 const struct flowi6 *fl6,
1218 const struct in6_addr *gateway)
1219{
1220 int flags = RT6_LOOKUP_F_HAS_SADDR;
1221 struct ip6rd_flowi rdfl;
1222
1223 rdfl.fl6 = *fl6;
1224 rdfl.gateway = *gateway;
1225
1226 return fib6_rule_lookup(net, &rdfl.fl6,
1227 flags, __ip6_route_redirect);
1228}
1229
1134void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1230void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1135{ 1231{
1136 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1232 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
@@ -1145,13 +1241,32 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1145 fl6.saddr = iph->saddr; 1241 fl6.saddr = iph->saddr;
1146 fl6.flowlabel = ip6_flowinfo(iph); 1242 fl6.flowlabel = ip6_flowinfo(iph);
1147 1243
1148 dst = ip6_route_output(net, NULL, &fl6); 1244 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1149 if (!dst->error) 1245 rt6_do_redirect(dst, NULL, skb);
1150 rt6_do_redirect(dst, NULL, skb);
1151 dst_release(dst); 1246 dst_release(dst);
1152} 1247}
1153EXPORT_SYMBOL_GPL(ip6_redirect); 1248EXPORT_SYMBOL_GPL(ip6_redirect);
1154 1249
1250void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1251 u32 mark)
1252{
1253 const struct ipv6hdr *iph = ipv6_hdr(skb);
1254 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1255 struct dst_entry *dst;
1256 struct flowi6 fl6;
1257
1258 memset(&fl6, 0, sizeof(fl6));
1259 fl6.flowi6_oif = oif;
1260 fl6.flowi6_mark = mark;
1261 fl6.flowi6_flags = 0;
1262 fl6.daddr = msg->dest;
1263 fl6.saddr = iph->daddr;
1264
1265 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1266 rt6_do_redirect(dst, NULL, skb);
1267 dst_release(dst);
1268}
1269
1155void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1270void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1156{ 1271{
1157 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); 1272 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
@@ -1285,7 +1400,6 @@ static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1285 1400
1286static int ip6_dst_gc(struct dst_ops *ops) 1401static int ip6_dst_gc(struct dst_ops *ops)
1287{ 1402{
1288 unsigned long now = jiffies;
1289 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1403 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1290 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1404 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1291 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1405 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
@@ -1295,13 +1409,12 @@ static int ip6_dst_gc(struct dst_ops *ops)
1295 int entries; 1409 int entries;
1296 1410
1297 entries = dst_entries_get_fast(ops); 1411 entries = dst_entries_get_fast(ops);
1298 if (time_after(rt_last_gc + rt_min_interval, now) && 1412 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1299 entries <= rt_max_size) 1413 entries <= rt_max_size)
1300 goto out; 1414 goto out;
1301 1415
1302 net->ipv6.ip6_rt_gc_expire++; 1416 net->ipv6.ip6_rt_gc_expire++;
1303 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1417 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1304 net->ipv6.ip6_rt_last_gc = now;
1305 entries = dst_entries_get_slow(ops); 1418 entries = dst_entries_get_slow(ops);
1306 if (entries < ops->gc_thresh) 1419 if (entries < ops->gc_thresh)
1307 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1420 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
@@ -1310,25 +1423,6 @@ out:
1310 return entries > rt_max_size; 1423 return entries > rt_max_size;
1311} 1424}
1312 1425
1313int ip6_dst_hoplimit(struct dst_entry *dst)
1314{
1315 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1316 if (hoplimit == 0) {
1317 struct net_device *dev = dst->dev;
1318 struct inet6_dev *idev;
1319
1320 rcu_read_lock();
1321 idev = __in6_dev_get(dev);
1322 if (idev)
1323 hoplimit = idev->cnf.hop_limit;
1324 else
1325 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1326 rcu_read_unlock();
1327 }
1328 return hoplimit;
1329}
1330EXPORT_SYMBOL(ip6_dst_hoplimit);
1331
1332/* 1426/*
1333 * 1427 *
1334 */ 1428 */
@@ -1649,7 +1743,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
1649 int optlen, on_link; 1743 int optlen, on_link;
1650 u8 *lladdr; 1744 u8 *lladdr;
1651 1745
1652 optlen = skb->tail - skb->transport_header; 1746 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1653 optlen -= sizeof(*msg); 1747 optlen -= sizeof(*msg);
1654 1748
1655 if (optlen < 0) { 1749 if (optlen < 0) {
@@ -2681,9 +2775,9 @@ errout:
2681} 2775}
2682 2776
2683static int ip6_route_dev_notify(struct notifier_block *this, 2777static int ip6_route_dev_notify(struct notifier_block *this,
2684 unsigned long event, void *data) 2778 unsigned long event, void *ptr)
2685{ 2779{
2686 struct net_device *dev = (struct net_device *)data; 2780 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2687 struct net *net = dev_net(dev); 2781 struct net *net = dev_net(dev);
2688 2782
2689 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2783 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
@@ -2790,7 +2884,7 @@ static const struct file_operations rt6_stats_seq_fops = {
2790#ifdef CONFIG_SYSCTL 2884#ifdef CONFIG_SYSCTL
2791 2885
2792static 2886static
2793int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2887int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2794 void __user *buffer, size_t *lenp, loff_t *ppos) 2888 void __user *buffer, size_t *lenp, loff_t *ppos)
2795{ 2889{
2796 struct net *net; 2890 struct net *net;
@@ -2801,11 +2895,11 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2801 net = (struct net *)ctl->extra1; 2895 net = (struct net *)ctl->extra1;
2802 delay = net->ipv6.sysctl.flush_delay; 2896 delay = net->ipv6.sysctl.flush_delay;
2803 proc_dointvec(ctl, write, buffer, lenp, ppos); 2897 proc_dointvec(ctl, write, buffer, lenp, ppos);
2804 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2898 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2805 return 0; 2899 return 0;
2806} 2900}
2807 2901
2808ctl_table ipv6_route_table_template[] = { 2902struct ctl_table ipv6_route_table_template[] = {
2809 { 2903 {
2810 .procname = "flush", 2904 .procname = "flush",
2811 .data = &init_net.ipv6.sysctl.flush_delay, 2905 .data = &init_net.ipv6.sysctl.flush_delay,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 335363478bbf..19269453a8ea 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -466,14 +466,14 @@ isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
466 466
467static void ipip6_tunnel_uninit(struct net_device *dev) 467static void ipip6_tunnel_uninit(struct net_device *dev)
468{ 468{
469 struct net *net = dev_net(dev); 469 struct ip_tunnel *tunnel = netdev_priv(dev);
470 struct sit_net *sitn = net_generic(net, sit_net_id); 470 struct sit_net *sitn = net_generic(tunnel->net, sit_net_id);
471 471
472 if (dev == sitn->fb_tunnel_dev) { 472 if (dev == sitn->fb_tunnel_dev) {
473 RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); 473 RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
474 } else { 474 } else {
475 ipip6_tunnel_unlink(sitn, netdev_priv(dev)); 475 ipip6_tunnel_unlink(sitn, tunnel);
476 ipip6_tunnel_del_prl(netdev_priv(dev), NULL); 476 ipip6_tunnel_del_prl(tunnel, NULL);
477 } 477 }
478 dev_put(dev); 478 dev_put(dev);
479} 479}
@@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
566 return false; 566 return false;
567} 567}
568 568
569/* Checks if an address matches an address on the tunnel interface.
570 * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
571 * Long story:
572 * This function is called after we considered the packet as spoofed
573 * in is_spoofed_6rd.
574 * We may have a router that is doing NAT for proto 41 packets
575 * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
576 * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
577 * function will return true, dropping the packet.
578 * But, we can still check if is spoofed against the IP
579 * addresses associated with the interface.
580 */
581static bool only_dnatted(const struct ip_tunnel *tunnel,
582 const struct in6_addr *v6dst)
583{
584 int prefix_len;
585
586#ifdef CONFIG_IPV6_SIT_6RD
587 prefix_len = tunnel->ip6rd.prefixlen + 32
588 - tunnel->ip6rd.relay_prefixlen;
589#else
590 prefix_len = 48;
591#endif
592 return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
593}
594
595/* Returns true if a packet is spoofed */
596static bool packet_is_spoofed(struct sk_buff *skb,
597 const struct iphdr *iph,
598 struct ip_tunnel *tunnel)
599{
600 const struct ipv6hdr *ipv6h;
601
602 if (tunnel->dev->priv_flags & IFF_ISATAP) {
603 if (!isatap_chksrc(skb, iph, tunnel))
604 return true;
605
606 return false;
607 }
608
609 if (tunnel->dev->flags & IFF_POINTOPOINT)
610 return false;
611
612 ipv6h = ipv6_hdr(skb);
613
614 if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
615 net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
616 &iph->saddr, &ipv6h->saddr,
617 &iph->daddr, &ipv6h->daddr);
618 return true;
619 }
620
621 if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
622 return false;
623
624 if (only_dnatted(tunnel, &ipv6h->daddr))
625 return false;
626
627 net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
628 &iph->saddr, &ipv6h->saddr,
629 &iph->daddr, &ipv6h->daddr);
630 return true;
631}
632
569static int ipip6_rcv(struct sk_buff *skb) 633static int ipip6_rcv(struct sk_buff *skb)
570{ 634{
571 const struct iphdr *iph = ip_hdr(skb); 635 const struct iphdr *iph = ip_hdr(skb);
@@ -577,29 +641,21 @@ static int ipip6_rcv(struct sk_buff *skb)
577 if (tunnel != NULL) { 641 if (tunnel != NULL) {
578 struct pcpu_tstats *tstats; 642 struct pcpu_tstats *tstats;
579 643
580 secpath_reset(skb); 644 if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
645 tunnel->parms.iph.protocol != 0)
646 goto out;
647
581 skb->mac_header = skb->network_header; 648 skb->mac_header = skb->network_header;
582 skb_reset_network_header(skb); 649 skb_reset_network_header(skb);
583 IPCB(skb)->flags = 0; 650 IPCB(skb)->flags = 0;
584 skb->protocol = htons(ETH_P_IPV6); 651 skb->protocol = htons(ETH_P_IPV6);
585 skb->pkt_type = PACKET_HOST;
586 652
587 if (tunnel->dev->priv_flags & IFF_ISATAP) { 653 if (packet_is_spoofed(skb, iph, tunnel)) {
588 if (!isatap_chksrc(skb, iph, tunnel)) { 654 tunnel->dev->stats.rx_errors++;
589 tunnel->dev->stats.rx_errors++; 655 goto out;
590 goto out;
591 }
592 } else {
593 if (is_spoofed_6rd(tunnel, iph->saddr,
594 &ipv6_hdr(skb)->saddr) ||
595 is_spoofed_6rd(tunnel, iph->daddr,
596 &ipv6_hdr(skb)->daddr)) {
597 tunnel->dev->stats.rx_errors++;
598 goto out;
599 }
600 } 656 }
601 657
602 __skb_tunnel_rx(skb, tunnel->dev); 658 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
603 659
604 err = IP_ECN_decapsulate(iph, skb); 660 err = IP_ECN_decapsulate(iph, skb);
605 if (unlikely(err)) { 661 if (unlikely(err)) {
@@ -629,6 +685,38 @@ out:
629 return 0; 685 return 0;
630} 686}
631 687
688static const struct tnl_ptk_info tpi = {
689 /* no tunnel info required for ipip. */
690 .proto = htons(ETH_P_IP),
691};
692
693static int ipip_rcv(struct sk_buff *skb)
694{
695 const struct iphdr *iph;
696 struct ip_tunnel *tunnel;
697
698 iph = ip_hdr(skb);
699 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
700 iph->saddr, iph->daddr);
701 if (tunnel != NULL) {
702 if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
703 tunnel->parms.iph.protocol != 0)
704 goto drop;
705
706 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
707 goto drop;
708 if (iptunnel_pull_header(skb, 0, tpi.proto))
709 goto drop;
710 return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
711 }
712
713 return 1;
714
715drop:
716 kfree_skb(skb);
717 return 0;
718}
719
632/* 720/*
633 * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function 721 * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
634 * stores the embedded IPv4 address in v4dst and returns true. 722 * stores the embedded IPv4 address in v4dst and returns true.
@@ -690,13 +778,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
690 __be16 df = tiph->frag_off; 778 __be16 df = tiph->frag_off;
691 struct rtable *rt; /* Route to the other host */ 779 struct rtable *rt; /* Route to the other host */
692 struct net_device *tdev; /* Device to other host */ 780 struct net_device *tdev; /* Device to other host */
693 struct iphdr *iph; /* Our new IP header */
694 unsigned int max_headroom; /* The extra header space needed */ 781 unsigned int max_headroom; /* The extra header space needed */
695 __be32 dst = tiph->daddr; 782 __be32 dst = tiph->daddr;
696 struct flowi4 fl4; 783 struct flowi4 fl4;
697 int mtu; 784 int mtu;
698 const struct in6_addr *addr6; 785 const struct in6_addr *addr6;
699 int addr_type; 786 int addr_type;
787 u8 ttl;
788 int err;
700 789
701 if (skb->protocol != htons(ETH_P_IPV6)) 790 if (skb->protocol != htons(ETH_P_IPV6))
702 goto tx_error; 791 goto tx_error;
@@ -713,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
713 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); 802 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
714 803
715 if (neigh == NULL) { 804 if (neigh == NULL) {
716 net_dbg_ratelimited("sit: nexthop == NULL\n"); 805 net_dbg_ratelimited("nexthop == NULL\n");
717 goto tx_error; 806 goto tx_error;
718 } 807 }
719 808
@@ -742,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
742 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); 831 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
743 832
744 if (neigh == NULL) { 833 if (neigh == NULL) {
745 net_dbg_ratelimited("sit: nexthop == NULL\n"); 834 net_dbg_ratelimited("nexthop == NULL\n");
746 goto tx_error; 835 goto tx_error;
747 } 836 }
748 837
@@ -764,7 +853,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
764 goto tx_error; 853 goto tx_error;
765 } 854 }
766 855
767 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, 856 rt = ip_route_output_ports(tunnel->net, &fl4, NULL,
768 dst, tiph->saddr, 857 dst, tiph->saddr,
769 0, 0, 858 0, 0,
770 IPPROTO_IPV6, RT_TOS(tos), 859 IPPROTO_IPV6, RT_TOS(tos),
@@ -839,34 +928,19 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
839 skb = new_skb; 928 skb = new_skb;
840 iph6 = ipv6_hdr(skb); 929 iph6 = ipv6_hdr(skb);
841 } 930 }
931 ttl = tiph->ttl;
932 if (ttl == 0)
933 ttl = iph6->hop_limit;
934 tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
935
936 if (likely(!skb->encapsulation)) {
937 skb_reset_inner_headers(skb);
938 skb->encapsulation = 1;
939 }
842 940
843 skb->transport_header = skb->network_header; 941 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
844 skb_push(skb, sizeof(struct iphdr)); 942 ttl, df, !net_eq(tunnel->net, dev_net(dev)));
845 skb_reset_network_header(skb); 943 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
846 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
847 IPCB(skb)->flags = 0;
848 skb_dst_drop(skb);
849 skb_dst_set(skb, &rt->dst);
850
851 /*
852 * Push down and install the IPIP header.
853 */
854
855 iph = ip_hdr(skb);
856 iph->version = 4;
857 iph->ihl = sizeof(struct iphdr)>>2;
858 iph->frag_off = df;
859 iph->protocol = IPPROTO_IPV6;
860 iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
861 iph->daddr = fl4.daddr;
862 iph->saddr = fl4.saddr;
863
864 if ((iph->ttl = tiph->ttl) == 0)
865 iph->ttl = iph6->hop_limit;
866
867 skb->ip_summed = CHECKSUM_NONE;
868 ip_select_ident(iph, skb_dst(skb), NULL);
869 iptunnel_xmit(skb, dev);
870 return NETDEV_TX_OK; 944 return NETDEV_TX_OK;
871 945
872tx_error_icmp: 946tx_error_icmp:
@@ -877,6 +951,43 @@ tx_error:
877 return NETDEV_TX_OK; 951 return NETDEV_TX_OK;
878} 952}
879 953
954static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
955{
956 struct ip_tunnel *tunnel = netdev_priv(dev);
957 const struct iphdr *tiph = &tunnel->parms.iph;
958
959 if (likely(!skb->encapsulation)) {
960 skb_reset_inner_headers(skb);
961 skb->encapsulation = 1;
962 }
963
964 ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
965 return NETDEV_TX_OK;
966}
967
968static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
969 struct net_device *dev)
970{
971 switch (skb->protocol) {
972 case htons(ETH_P_IP):
973 ipip_tunnel_xmit(skb, dev);
974 break;
975 case htons(ETH_P_IPV6):
976 ipip6_tunnel_xmit(skb, dev);
977 break;
978 default:
979 goto tx_err;
980 }
981
982 return NETDEV_TX_OK;
983
984tx_err:
985 dev->stats.tx_errors++;
986 dev_kfree_skb(skb);
987 return NETDEV_TX_OK;
988
989}
990
880static void ipip6_tunnel_bind_dev(struct net_device *dev) 991static void ipip6_tunnel_bind_dev(struct net_device *dev)
881{ 992{
882 struct net_device *tdev = NULL; 993 struct net_device *tdev = NULL;
@@ -888,7 +999,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
888 iph = &tunnel->parms.iph; 999 iph = &tunnel->parms.iph;
889 1000
890 if (iph->daddr) { 1001 if (iph->daddr) {
891 struct rtable *rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, 1002 struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4,
1003 NULL,
892 iph->daddr, iph->saddr, 1004 iph->daddr, iph->saddr,
893 0, 0, 1005 0, 0,
894 IPPROTO_IPV6, 1006 IPPROTO_IPV6,
@@ -903,7 +1015,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
903 } 1015 }
904 1016
905 if (!tdev && tunnel->parms.link) 1017 if (!tdev && tunnel->parms.link)
906 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 1018 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
907 1019
908 if (tdev) { 1020 if (tdev) {
909 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); 1021 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
@@ -916,7 +1028,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
916 1028
917static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) 1029static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
918{ 1030{
919 struct net *net = dev_net(t->dev); 1031 struct net *net = t->net;
920 struct sit_net *sitn = net_generic(net, sit_net_id); 1032 struct sit_net *sitn = net_generic(net, sit_net_id);
921 1033
922 ipip6_tunnel_unlink(sitn, t); 1034 ipip6_tunnel_unlink(sitn, t);
@@ -1027,7 +1139,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1027 goto done; 1139 goto done;
1028 1140
1029 err = -EINVAL; 1141 err = -EINVAL;
1030 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || 1142 if (p.iph.protocol != IPPROTO_IPV6 &&
1143 p.iph.protocol != IPPROTO_IPIP &&
1144 p.iph.protocol != 0)
1145 goto done;
1146 if (p.iph.version != 4 ||
1031 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) 1147 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
1032 goto done; 1148 goto done;
1033 if (p.iph.ttl) 1149 if (p.iph.ttl)
@@ -1164,7 +1280,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1164 1280
1165static const struct net_device_ops ipip6_netdev_ops = { 1281static const struct net_device_ops ipip6_netdev_ops = {
1166 .ndo_uninit = ipip6_tunnel_uninit, 1282 .ndo_uninit = ipip6_tunnel_uninit,
1167 .ndo_start_xmit = ipip6_tunnel_xmit, 1283 .ndo_start_xmit = sit_tunnel_xmit,
1168 .ndo_do_ioctl = ipip6_tunnel_ioctl, 1284 .ndo_do_ioctl = ipip6_tunnel_ioctl,
1169 .ndo_change_mtu = ipip6_tunnel_change_mtu, 1285 .ndo_change_mtu = ipip6_tunnel_change_mtu,
1170 .ndo_get_stats64 = ip_tunnel_get_stats64, 1286 .ndo_get_stats64 = ip_tunnel_get_stats64,
@@ -1188,7 +1304,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
1188 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1304 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1189 dev->iflink = 0; 1305 dev->iflink = 0;
1190 dev->addr_len = 4; 1306 dev->addr_len = 4;
1191 dev->features |= NETIF_F_NETNS_LOCAL;
1192 dev->features |= NETIF_F_LLTX; 1307 dev->features |= NETIF_F_LLTX;
1193} 1308}
1194 1309
@@ -1197,6 +1312,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
1197 struct ip_tunnel *tunnel = netdev_priv(dev); 1312 struct ip_tunnel *tunnel = netdev_priv(dev);
1198 1313
1199 tunnel->dev = dev; 1314 tunnel->dev = dev;
1315 tunnel->net = dev_net(dev);
1200 1316
1201 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 1317 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1202 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1318 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
@@ -1217,6 +1333,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1217 struct sit_net *sitn = net_generic(net, sit_net_id); 1333 struct sit_net *sitn = net_generic(net, sit_net_id);
1218 1334
1219 tunnel->dev = dev; 1335 tunnel->dev = dev;
1336 tunnel->net = dev_net(dev);
1220 strcpy(tunnel->parms.name, dev->name); 1337 strcpy(tunnel->parms.name, dev->name);
1221 1338
1222 iph->version = 4; 1339 iph->version = 4;
@@ -1232,6 +1349,22 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1232 return 0; 1349 return 0;
1233} 1350}
1234 1351
1352static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
1353{
1354 u8 proto;
1355
1356 if (!data || !data[IFLA_IPTUN_PROTO])
1357 return 0;
1358
1359 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1360 if (proto != IPPROTO_IPV6 &&
1361 proto != IPPROTO_IPIP &&
1362 proto != 0)
1363 return -EINVAL;
1364
1365 return 0;
1366}
1367
1235static void ipip6_netlink_parms(struct nlattr *data[], 1368static void ipip6_netlink_parms(struct nlattr *data[],
1236 struct ip_tunnel_parm *parms) 1369 struct ip_tunnel_parm *parms)
1237{ 1370{
@@ -1268,6 +1401,10 @@ static void ipip6_netlink_parms(struct nlattr *data[],
1268 1401
1269 if (data[IFLA_IPTUN_FLAGS]) 1402 if (data[IFLA_IPTUN_FLAGS])
1270 parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); 1403 parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
1404
1405 if (data[IFLA_IPTUN_PROTO])
1406 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1407
1271} 1408}
1272 1409
1273#ifdef CONFIG_IPV6_SIT_6RD 1410#ifdef CONFIG_IPV6_SIT_6RD
@@ -1339,9 +1476,9 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
1339static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], 1476static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
1340 struct nlattr *data[]) 1477 struct nlattr *data[])
1341{ 1478{
1342 struct ip_tunnel *t; 1479 struct ip_tunnel *t = netdev_priv(dev);
1343 struct ip_tunnel_parm p; 1480 struct ip_tunnel_parm p;
1344 struct net *net = dev_net(dev); 1481 struct net *net = t->net;
1345 struct sit_net *sitn = net_generic(net, sit_net_id); 1482 struct sit_net *sitn = net_generic(net, sit_net_id);
1346#ifdef CONFIG_IPV6_SIT_6RD 1483#ifdef CONFIG_IPV6_SIT_6RD
1347 struct ip_tunnel_6rd ip6rd; 1484 struct ip_tunnel_6rd ip6rd;
@@ -1391,6 +1528,8 @@ static size_t ipip6_get_size(const struct net_device *dev)
1391 nla_total_size(1) + 1528 nla_total_size(1) +
1392 /* IFLA_IPTUN_FLAGS */ 1529 /* IFLA_IPTUN_FLAGS */
1393 nla_total_size(2) + 1530 nla_total_size(2) +
1531 /* IFLA_IPTUN_PROTO */
1532 nla_total_size(1) +
1394#ifdef CONFIG_IPV6_SIT_6RD 1533#ifdef CONFIG_IPV6_SIT_6RD
1395 /* IFLA_IPTUN_6RD_PREFIX */ 1534 /* IFLA_IPTUN_6RD_PREFIX */
1396 nla_total_size(sizeof(struct in6_addr)) + 1535 nla_total_size(sizeof(struct in6_addr)) +
@@ -1416,6 +1555,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
1416 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || 1555 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
1417 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, 1556 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
1418 !!(parm->iph.frag_off & htons(IP_DF))) || 1557 !!(parm->iph.frag_off & htons(IP_DF))) ||
1558 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
1419 nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) 1559 nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
1420 goto nla_put_failure; 1560 goto nla_put_failure;
1421 1561
@@ -1445,6 +1585,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
1445 [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, 1585 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
1446 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, 1586 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
1447 [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, 1587 [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 },
1588 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
1448#ifdef CONFIG_IPV6_SIT_6RD 1589#ifdef CONFIG_IPV6_SIT_6RD
1449 [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, 1590 [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) },
1450 [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, 1591 [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 },
@@ -1459,6 +1600,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = {
1459 .policy = ipip6_policy, 1600 .policy = ipip6_policy,
1460 .priv_size = sizeof(struct ip_tunnel), 1601 .priv_size = sizeof(struct ip_tunnel),
1461 .setup = ipip6_tunnel_setup, 1602 .setup = ipip6_tunnel_setup,
1603 .validate = ipip6_validate,
1462 .newlink = ipip6_newlink, 1604 .newlink = ipip6_newlink,
1463 .changelink = ipip6_changelink, 1605 .changelink = ipip6_changelink,
1464 .get_size = ipip6_get_size, 1606 .get_size = ipip6_get_size,
@@ -1471,10 +1613,22 @@ static struct xfrm_tunnel sit_handler __read_mostly = {
1471 .priority = 1, 1613 .priority = 1,
1472}; 1614};
1473 1615
1616static struct xfrm_tunnel ipip_handler __read_mostly = {
1617 .handler = ipip_rcv,
1618 .err_handler = ipip6_err,
1619 .priority = 2,
1620};
1621
1474static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) 1622static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
1475{ 1623{
1624 struct net *net = dev_net(sitn->fb_tunnel_dev);
1625 struct net_device *dev, *aux;
1476 int prio; 1626 int prio;
1477 1627
1628 for_each_netdev_safe(net, dev, aux)
1629 if (dev->rtnl_link_ops == &sit_link_ops)
1630 unregister_netdevice_queue(dev, head);
1631
1478 for (prio = 1; prio < 4; prio++) { 1632 for (prio = 1; prio < 4; prio++) {
1479 int h; 1633 int h;
1480 for (h = 0; h < HASH_SIZE; h++) { 1634 for (h = 0; h < HASH_SIZE; h++) {
@@ -1482,7 +1636,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
1482 1636
1483 t = rtnl_dereference(sitn->tunnels[prio][h]); 1637 t = rtnl_dereference(sitn->tunnels[prio][h]);
1484 while (t != NULL) { 1638 while (t != NULL) {
1485 unregister_netdevice_queue(t->dev, head); 1639 /* If dev is in the same netns, it has already
1640 * been added to the list by the previous loop.
1641 */
1642 if (!net_eq(dev_net(t->dev), net))
1643 unregister_netdevice_queue(t->dev,
1644 head);
1486 t = rtnl_dereference(t->next); 1645 t = rtnl_dereference(t->next);
1487 } 1646 }
1488 } 1647 }
@@ -1507,6 +1666,11 @@ static int __net_init sit_init_net(struct net *net)
1507 goto err_alloc_dev; 1666 goto err_alloc_dev;
1508 } 1667 }
1509 dev_net_set(sitn->fb_tunnel_dev, net); 1668 dev_net_set(sitn->fb_tunnel_dev, net);
1669 sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
1670 /* FB netdevice is special: we have one, and only one per netns.
1671 * Allowing to move it to another netns is clearly unsafe.
1672 */
1673 sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1510 1674
1511 err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); 1675 err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
1512 if (err) 1676 if (err)
@@ -1537,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net)
1537 1701
1538 rtnl_lock(); 1702 rtnl_lock();
1539 sit_destroy_tunnels(sitn, &list); 1703 sit_destroy_tunnels(sitn, &list);
1540 unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
1541 unregister_netdevice_many(&list); 1704 unregister_netdevice_many(&list);
1542 rtnl_unlock(); 1705 rtnl_unlock();
1543} 1706}
@@ -1553,6 +1716,7 @@ static void __exit sit_cleanup(void)
1553{ 1716{
1554 rtnl_link_unregister(&sit_link_ops); 1717 rtnl_link_unregister(&sit_link_ops);
1555 xfrm4_tunnel_deregister(&sit_handler, AF_INET6); 1718 xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1719 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1556 1720
1557 unregister_pernet_device(&sit_net_ops); 1721 unregister_pernet_device(&sit_net_ops);
1558 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 1722 rcu_barrier(); /* Wait for completion of call_rcu()'s */
@@ -1569,9 +1733,14 @@ static int __init sit_init(void)
1569 return err; 1733 return err;
1570 err = xfrm4_tunnel_register(&sit_handler, AF_INET6); 1734 err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
1571 if (err < 0) { 1735 if (err < 0) {
1572 pr_info("%s: can't add protocol\n", __func__); 1736 pr_info("%s: can't register ip6ip4\n", __func__);
1573 goto xfrm_tunnel_failed; 1737 goto xfrm_tunnel_failed;
1574 } 1738 }
1739 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
1740 if (err < 0) {
1741 pr_info("%s: can't register ip4ip4\n", __func__);
1742 goto xfrm_tunnel4_failed;
1743 }
1575 err = rtnl_link_register(&sit_link_ops); 1744 err = rtnl_link_register(&sit_link_ops);
1576 if (err < 0) 1745 if (err < 0)
1577 goto rtnl_link_failed; 1746 goto rtnl_link_failed;
@@ -1580,6 +1749,8 @@ out:
1580 return err; 1749 return err;
1581 1750
1582rtnl_link_failed: 1751rtnl_link_failed:
1752 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1753xfrm_tunnel4_failed:
1583 xfrm4_tunnel_deregister(&sit_handler, AF_INET6); 1754 xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1584xfrm_tunnel_failed: 1755xfrm_tunnel_failed:
1585 unregister_pernet_device(&sit_net_ops); 1756 unregister_pernet_device(&sit_net_ops);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d5dda20bd717..bf63ac8a49b9 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -112,32 +112,38 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
112 & COOKIEMASK; 112 & COOKIEMASK;
113} 113}
114 114
115__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) 115u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
116 const struct tcphdr *th, __u16 *mssp)
116{ 117{
117 const struct ipv6hdr *iph = ipv6_hdr(skb);
118 const struct tcphdr *th = tcp_hdr(skb);
119 int mssind; 118 int mssind;
120 const __u16 mss = *mssp; 119 const __u16 mss = *mssp;
121 120
122 tcp_synq_overflow(sk);
123
124 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) 121 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
125 if (mss >= msstab[mssind]) 122 if (mss >= msstab[mssind])
126 break; 123 break;
127 124
128 *mssp = msstab[mssind]; 125 *mssp = msstab[mssind];
129 126
130 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
131
132 return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, 127 return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
133 th->dest, ntohl(th->seq), 128 th->dest, ntohl(th->seq),
134 jiffies / (HZ * 60), mssind); 129 jiffies / (HZ * 60), mssind);
135} 130}
131EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
136 132
137static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) 133__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
138{ 134{
139 const struct ipv6hdr *iph = ipv6_hdr(skb); 135 const struct ipv6hdr *iph = ipv6_hdr(skb);
140 const struct tcphdr *th = tcp_hdr(skb); 136 const struct tcphdr *th = tcp_hdr(skb);
137
138 tcp_synq_overflow(sk);
139 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
140
141 return __cookie_v6_init_sequence(iph, th, mssp);
142}
143
144int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
145 __u32 cookie)
146{
141 __u32 seq = ntohl(th->seq) - 1; 147 __u32 seq = ntohl(th->seq) - 1;
142 __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, 148 __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
143 th->source, th->dest, seq, 149 th->source, th->dest, seq,
@@ -145,6 +151,7 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
145 151
146 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; 152 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
147} 153}
154EXPORT_SYMBOL_GPL(__cookie_v6_check);
148 155
149struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) 156struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
150{ 157{
@@ -167,7 +174,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
167 goto out; 174 goto out;
168 175
169 if (tcp_synq_no_recent_overflow(sk) || 176 if (tcp_synq_no_recent_overflow(sk) ||
170 (mss = cookie_check(skb, cookie)) == 0) { 177 (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) {
171 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); 178 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
172 goto out; 179 goto out;
173 } 180 }
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index e85c48bd404f..107b2f1d90ae 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,7 +16,7 @@
16#include <net/addrconf.h> 16#include <net/addrconf.h>
17#include <net/inet_frag.h> 17#include <net/inet_frag.h>
18 18
19static ctl_table ipv6_table_template[] = { 19static struct ctl_table ipv6_table_template[] = {
20 { 20 {
21 .procname = "bindv6only", 21 .procname = "bindv6only",
22 .data = &init_net.ipv6.sysctl.bindv6only, 22 .data = &init_net.ipv6.sysctl.bindv6only,
@@ -27,7 +27,7 @@ static ctl_table ipv6_table_template[] = {
27 { } 27 { }
28}; 28};
29 29
30static ctl_table ipv6_rotable[] = { 30static struct ctl_table ipv6_rotable[] = {
31 { 31 {
32 .procname = "mld_max_msf", 32 .procname = "mld_max_msf",
33 .data = &sysctl_mld_max_msf, 33 .data = &sysctl_mld_max_msf,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0a17ed9eaf39..5c71501fc917 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -63,6 +63,7 @@
63#include <net/inet_common.h> 63#include <net/inet_common.h>
64#include <net/secure_seq.h> 64#include <net/secure_seq.h>
65#include <net/tcp_memcontrol.h> 65#include <net/tcp_memcontrol.h>
66#include <net/busy_poll.h>
66 67
67#include <asm/uaccess.h> 68#include <asm/uaccess.h>
68 69
@@ -962,7 +963,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
962 if (!ipv6_unicast_destination(skb)) 963 if (!ipv6_unicast_destination(skb))
963 goto drop; 964 goto drop;
964 965
965 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 966 if ((sysctl_tcp_syncookies == 2 ||
967 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
966 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); 968 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
967 if (!want_cookie) 969 if (!want_cookie)
968 goto drop; 970 goto drop;
@@ -1236,8 +1238,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1236 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1238 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1237 1239
1238 tcp_initialize_rcv_mss(newsk); 1240 tcp_initialize_rcv_mss(newsk);
1239 tcp_synack_rtt_meas(newsk, req);
1240 newtp->total_retrans = req->num_retrans;
1241 1241
1242 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1242 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1243 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1243 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
@@ -1360,8 +1360,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1360 } 1360 }
1361 } 1361 }
1362 1362
1363 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) 1363 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1364 goto reset;
1365 if (opt_skb) 1364 if (opt_skb)
1366 goto ipv6_pktoptions; 1365 goto ipv6_pktoptions;
1367 return 0; 1366 return 0;
@@ -1426,7 +1425,7 @@ ipv6_pktoptions:
1426 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1425 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1427 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1426 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1428 if (np->rxopt.bits.rxtclass) 1427 if (np->rxopt.bits.rxtclass)
1429 np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); 1428 np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb));
1430 if (ipv6_opt_accepted(sk, opt_skb)) { 1429 if (ipv6_opt_accepted(sk, opt_skb)) {
1431 skb_set_owner_r(opt_skb, sk); 1430 skb_set_owner_r(opt_skb, sk);
1432 opt_skb = xchg(&np->pktoptions, opt_skb); 1431 opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1498,6 +1497,7 @@ process:
1498 if (sk_filter(sk, skb)) 1497 if (sk_filter(sk, skb))
1499 goto discard_and_relse; 1498 goto discard_and_relse;
1500 1499
1500 sk_mark_napi_id(sk, skb);
1501 skb->dev = NULL; 1501 skb->dev = NULL;
1502 1502
1503 bh_lock_sock_nested(sk); 1503 bh_lock_sock_nested(sk);
@@ -1730,7 +1730,7 @@ static void get_openreq6(struct seq_file *seq,
1730 1730
1731 seq_printf(seq, 1731 seq_printf(seq,
1732 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1732 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1733 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 1733 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1734 i, 1734 i,
1735 src->s6_addr32[0], src->s6_addr32[1], 1735 src->s6_addr32[0], src->s6_addr32[1],
1736 src->s6_addr32[2], src->s6_addr32[3], 1736 src->s6_addr32[2], src->s6_addr32[3],
@@ -1781,7 +1781,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1781 1781
1782 seq_printf(seq, 1782 seq_printf(seq,
1783 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1783 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1784 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n", 1784 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1785 i, 1785 i,
1786 src->s6_addr32[0], src->s6_addr32[1], 1786 src->s6_addr32[0], src->s6_addr32[1],
1787 src->s6_addr32[2], src->s6_addr32[3], srcp, 1787 src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1924,6 +1924,7 @@ struct proto tcpv6_prot = {
1924 .unhash = inet_unhash, 1924 .unhash = inet_unhash,
1925 .get_port = inet_csk_get_port, 1925 .get_port = inet_csk_get_port,
1926 .enter_memory_pressure = tcp_enter_memory_pressure, 1926 .enter_memory_pressure = tcp_enter_memory_pressure,
1927 .stream_memory_free = tcp_stream_memory_free,
1927 .sockets_allocated = &tcp_sockets_allocated, 1928 .sockets_allocated = &tcp_sockets_allocated,
1928 .memory_allocated = &tcp_memory_allocated, 1929 .memory_allocated = &tcp_memory_allocated,
1929 .memory_pressure = &tcp_memory_pressure, 1930 .memory_pressure = &tcp_memory_pressure,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 42923b14dfa6..72b7eaaf3ca0 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -46,6 +46,7 @@
46#include <net/ip6_checksum.h> 46#include <net/ip6_checksum.h>
47#include <net/xfrm.h> 47#include <net/xfrm.h>
48#include <net/inet6_hashtables.h> 48#include <net/inet6_hashtables.h>
49#include <net/busy_poll.h>
49 50
50#include <linux/proc_fs.h> 51#include <linux/proc_fs.h>
51#include <linux/seq_file.h> 52#include <linux/seq_file.h>
@@ -524,8 +525,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
524 525
525 if (type == ICMPV6_PKT_TOOBIG) 526 if (type == ICMPV6_PKT_TOOBIG)
526 ip6_sk_update_pmtu(skb, sk, info); 527 ip6_sk_update_pmtu(skb, sk, info);
527 if (type == NDISC_REDIRECT) 528 if (type == NDISC_REDIRECT) {
528 ip6_sk_redirect(skb, sk); 529 ip6_sk_redirect(skb, sk);
530 goto out;
531 }
529 532
530 np = inet6_sk(sk); 533 np = inet6_sk(sk);
531 534
@@ -841,7 +844,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
841 */ 844 */
842 sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); 845 sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
843 if (sk != NULL) { 846 if (sk != NULL) {
844 int ret = udpv6_queue_rcv_skb(sk, skb); 847 int ret;
848
849 sk_mark_napi_id(sk, skb);
850 ret = udpv6_queue_rcv_skb(sk, skb);
845 sock_put(sk); 851 sock_put(sk);
846 852
847 /* a return value > 0 means to resubmit the input, but 853 /* a return value > 0 means to resubmit the input, but
@@ -955,11 +961,16 @@ static int udp_v6_push_pending_frames(struct sock *sk)
955 struct udphdr *uh; 961 struct udphdr *uh;
956 struct udp_sock *up = udp_sk(sk); 962 struct udp_sock *up = udp_sk(sk);
957 struct inet_sock *inet = inet_sk(sk); 963 struct inet_sock *inet = inet_sk(sk);
958 struct flowi6 *fl6 = &inet->cork.fl.u.ip6; 964 struct flowi6 *fl6;
959 int err = 0; 965 int err = 0;
960 int is_udplite = IS_UDPLITE(sk); 966 int is_udplite = IS_UDPLITE(sk);
961 __wsum csum = 0; 967 __wsum csum = 0;
962 968
969 if (up->pending == AF_INET)
970 return udp_push_pending_frames(sk);
971
972 fl6 = &inet->cork.fl.u.ip6;
973
963 /* Grab the skbuff where UDP header space exists. */ 974 /* Grab the skbuff where UDP header space exists. */
964 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) 975 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
965 goto out; 976 goto out;
@@ -1359,48 +1370,17 @@ static const struct inet6_protocol udpv6_protocol = {
1359 1370
1360/* ------------------------------------------------------------------------ */ 1371/* ------------------------------------------------------------------------ */
1361#ifdef CONFIG_PROC_FS 1372#ifdef CONFIG_PROC_FS
1362
1363static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket)
1364{
1365 struct inet_sock *inet = inet_sk(sp);
1366 struct ipv6_pinfo *np = inet6_sk(sp);
1367 const struct in6_addr *dest, *src;
1368 __u16 destp, srcp;
1369
1370 dest = &np->daddr;
1371 src = &np->rcv_saddr;
1372 destp = ntohs(inet->inet_dport);
1373 srcp = ntohs(inet->inet_sport);
1374 seq_printf(seq,
1375 "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1376 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
1377 bucket,
1378 src->s6_addr32[0], src->s6_addr32[1],
1379 src->s6_addr32[2], src->s6_addr32[3], srcp,
1380 dest->s6_addr32[0], dest->s6_addr32[1],
1381 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1382 sp->sk_state,
1383 sk_wmem_alloc_get(sp),
1384 sk_rmem_alloc_get(sp),
1385 0, 0L, 0,
1386 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1387 0,
1388 sock_i_ino(sp),
1389 atomic_read(&sp->sk_refcnt), sp,
1390 atomic_read(&sp->sk_drops));
1391}
1392
1393int udp6_seq_show(struct seq_file *seq, void *v) 1373int udp6_seq_show(struct seq_file *seq, void *v)
1394{ 1374{
1395 if (v == SEQ_START_TOKEN) 1375 if (v == SEQ_START_TOKEN) {
1396 seq_printf(seq, 1376 seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
1397 " sl " 1377 } else {
1398 "local_address " 1378 int bucket = ((struct udp_iter_state *)seq->private)->bucket;
1399 "remote_address " 1379 struct inet_sock *inet = inet_sk(v);
1400 "st tx_queue rx_queue tr tm->when retrnsmt" 1380 __u16 srcp = ntohs(inet->inet_sport);
1401 " uid timeout inode ref pointer drops\n"); 1381 __u16 destp = ntohs(inet->inet_dport);
1402 else 1382 ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
1403 udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); 1383 }
1404 return 0; 1384 return 0;
1405} 1385}
1406 1386
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index d3cfaf9c7a08..60559511bd9c 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -21,26 +21,25 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
21 const struct ipv6hdr *ipv6h; 21 const struct ipv6hdr *ipv6h;
22 struct udphdr *uh; 22 struct udphdr *uh;
23 23
24 /* UDP Tunnel offload on ipv6 is not yet supported. */
25 if (skb->encapsulation)
26 return -EINVAL;
27
28 if (!pskb_may_pull(skb, sizeof(*uh))) 24 if (!pskb_may_pull(skb, sizeof(*uh)))
29 return -EINVAL; 25 return -EINVAL;
30 26
31 ipv6h = ipv6_hdr(skb); 27 if (likely(!skb->encapsulation)) {
32 uh = udp_hdr(skb); 28 ipv6h = ipv6_hdr(skb);
29 uh = udp_hdr(skb);
30
31 uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
32 IPPROTO_UDP, 0);
33 skb->csum_start = skb_transport_header(skb) - skb->head;
34 skb->csum_offset = offsetof(struct udphdr, check);
35 skb->ip_summed = CHECKSUM_PARTIAL;
36 }
33 37
34 uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
35 IPPROTO_UDP, 0);
36 skb->csum_start = skb_transport_header(skb) - skb->head;
37 skb->csum_offset = offsetof(struct udphdr, check);
38 skb->ip_summed = CHECKSUM_PARTIAL;
39 return 0; 38 return 0;
40} 39}
41 40
42static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, 41static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
43 netdev_features_t features) 42 netdev_features_t features)
44{ 43{
45 struct sk_buff *segs = ERR_PTR(-EINVAL); 44 struct sk_buff *segs = ERR_PTR(-EINVAL);
46 unsigned int mss; 45 unsigned int mss;
@@ -64,7 +63,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
64 if (unlikely(type & ~(SKB_GSO_UDP | 63 if (unlikely(type & ~(SKB_GSO_UDP |
65 SKB_GSO_DODGY | 64 SKB_GSO_DODGY |
66 SKB_GSO_UDP_TUNNEL | 65 SKB_GSO_UDP_TUNNEL |
67 SKB_GSO_GRE) || 66 SKB_GSO_GRE |
67 SKB_GSO_MPLS) ||
68 !(type & (SKB_GSO_UDP)))) 68 !(type & (SKB_GSO_UDP))))
69 goto out; 69 goto out;
70 70
@@ -74,47 +74,51 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
74 goto out; 74 goto out;
75 } 75 }
76 76
77 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot 77 if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
78 * do checksum of UDP packets sent as multiple IP fragments. 78 segs = skb_udp_tunnel_segment(skb, features);
79 */ 79 else {
80 offset = skb_checksum_start_offset(skb); 80 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
81 csum = skb_checksum(skb, offset, skb->len - offset, 0); 81 * do checksum of UDP packets sent as multiple IP fragments.
82 offset += skb->csum_offset; 82 */
83 *(__sum16 *)(skb->data + offset) = csum_fold(csum); 83 offset = skb_checksum_start_offset(skb);
84 skb->ip_summed = CHECKSUM_NONE; 84 csum = skb_checksum(skb, offset, skb->len - offset, 0);
85 85 offset += skb->csum_offset;
86 /* Check if there is enough headroom to insert fragment header. */ 86 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
87 tnl_hlen = skb_tnl_header_len(skb); 87 skb->ip_summed = CHECKSUM_NONE;
88 if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { 88
89 if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) 89 /* Check if there is enough headroom to insert fragment header. */
90 goto out; 90 tnl_hlen = skb_tnl_header_len(skb);
91 if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
92 if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
93 goto out;
94 }
95
96 /* Find the unfragmentable header and shift it left by frag_hdr_sz
97 * bytes to insert fragment header.
98 */
99 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
100 nexthdr = *prevhdr;
101 *prevhdr = NEXTHDR_FRAGMENT;
102 unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
103 unfrag_ip6hlen + tnl_hlen;
104 packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
105 memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
106
107 SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
108 skb->mac_header -= frag_hdr_sz;
109 skb->network_header -= frag_hdr_sz;
110
111 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
112 fptr->nexthdr = nexthdr;
113 fptr->reserved = 0;
114 ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
115
116 /* Fragment the skb. ipv6 header and the remaining fields of the
117 * fragment header are updated in ipv6_gso_segment()
118 */
119 segs = skb_segment(skb, features);
91 } 120 }
92 121
93 /* Find the unfragmentable header and shift it left by frag_hdr_sz
94 * bytes to insert fragment header.
95 */
96 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
97 nexthdr = *prevhdr;
98 *prevhdr = NEXTHDR_FRAGMENT;
99 unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
100 unfrag_ip6hlen + tnl_hlen;
101 packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
102 memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
103
104 SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
105 skb->mac_header -= frag_hdr_sz;
106 skb->network_header -= frag_hdr_sz;
107
108 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
109 fptr->nexthdr = nexthdr;
110 fptr->reserved = 0;
111 ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
112
113 /* Fragment the skb. ipv6 header and the remaining fields of the
114 * fragment header are updated in ipv6_gso_segment()
115 */
116 segs = skb_segment(skb, features);
117
118out: 122out:
119 return segs; 123 return segs;
120} 124}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8755a3079d0f..6cd625e37706 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -34,8 +34,10 @@ static int xfrm6_local_dontfrag(struct sk_buff *skb)
34 struct sock *sk = skb->sk; 34 struct sock *sk = skb->sk;
35 35
36 if (sk) { 36 if (sk) {
37 proto = sk->sk_protocol; 37 if (sk->sk_family != AF_INET6)
38 return 0;
38 39
40 proto = sk->sk_protocol;
39 if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) 41 if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
40 return inet6_sk(sk)->dontfrag; 42 return inet6_sk(sk)->dontfrag;
41 } 43 }
@@ -54,13 +56,15 @@ static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
54 ipv6_local_rxpmtu(sk, &fl6, mtu); 56 ipv6_local_rxpmtu(sk, &fl6, mtu);
55} 57}
56 58
57static void xfrm6_local_error(struct sk_buff *skb, u32 mtu) 59void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
58{ 60{
59 struct flowi6 fl6; 61 struct flowi6 fl6;
62 const struct ipv6hdr *hdr;
60 struct sock *sk = skb->sk; 63 struct sock *sk = skb->sk;
61 64
65 hdr = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
62 fl6.fl6_dport = inet_sk(sk)->inet_dport; 66 fl6.fl6_dport = inet_sk(sk)->inet_dport;
63 fl6.daddr = ipv6_hdr(skb)->daddr; 67 fl6.daddr = hdr->daddr;
64 68
65 ipv6_local_error(sk, EMSGSIZE, &fl6, mtu); 69 ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
66} 70}
@@ -80,7 +84,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
80 if (xfrm6_local_dontfrag(skb)) 84 if (xfrm6_local_dontfrag(skb))
81 xfrm6_local_rxpmtu(skb, mtu); 85 xfrm6_local_rxpmtu(skb, mtu);
82 else if (skb->sk) 86 else if (skb->sk)
83 xfrm6_local_error(skb, mtu); 87 xfrm_local_error(skb, mtu);
84 else 88 else
85 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 89 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
86 ret = -EMSGSIZE; 90 ret = -EMSGSIZE;
@@ -136,13 +140,18 @@ static int __xfrm6_output(struct sk_buff *skb)
136{ 140{
137 struct dst_entry *dst = skb_dst(skb); 141 struct dst_entry *dst = skb_dst(skb);
138 struct xfrm_state *x = dst->xfrm; 142 struct xfrm_state *x = dst->xfrm;
139 int mtu = ip6_skb_dst_mtu(skb); 143 int mtu;
144
145 if (skb->protocol == htons(ETH_P_IPV6))
146 mtu = ip6_skb_dst_mtu(skb);
147 else
148 mtu = dst_mtu(skb_dst(skb));
140 149
141 if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { 150 if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
142 xfrm6_local_rxpmtu(skb, mtu); 151 xfrm6_local_rxpmtu(skb, mtu);
143 return -EMSGSIZE; 152 return -EMSGSIZE;
144 } else if (!skb->local_df && skb->len > mtu && skb->sk) { 153 } else if (!skb->local_df && skb->len > mtu && skb->sk) {
145 xfrm6_local_error(skb, mtu); 154 xfrm_local_error(skb, mtu);
146 return -EMSGSIZE; 155 return -EMSGSIZE;
147 } 156 }
148 157
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index d8c70b8efc24..3fc970135fc6 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -183,6 +183,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
183 .extract_input = xfrm6_extract_input, 183 .extract_input = xfrm6_extract_input,
184 .extract_output = xfrm6_extract_output, 184 .extract_output = xfrm6_extract_output,
185 .transport_finish = xfrm6_transport_finish, 185 .transport_finish = xfrm6_transport_finish,
186 .local_error = xfrm6_local_error,
186}; 187};
187 188
188int __init xfrm6_state_init(void) 189int __init xfrm6_state_init(void)
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index f547a47d381c..7a1e0fc1bd4d 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -330,7 +330,7 @@ static __inline__ void __ipxitf_put(struct ipx_interface *intrfc)
330static int ipxitf_device_event(struct notifier_block *notifier, 330static int ipxitf_device_event(struct notifier_block *notifier,
331 unsigned long event, void *ptr) 331 unsigned long event, void *ptr)
332{ 332{
333 struct net_device *dev = ptr; 333 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
334 struct ipx_interface *i, *tmp; 334 struct ipx_interface *i, *tmp;
335 335
336 if (!net_eq(dev_net(dev), &init_net)) 336 if (!net_eq(dev_net(dev), &init_net))
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 65e8833a2510..e15c16a517e7 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -213,7 +213,7 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v)
213 ntohs(ipxs->dest_addr.sock)); 213 ntohs(ipxs->dest_addr.sock));
214 } 214 }
215 215
216 seq_printf(seq, "%08X %08X %02X %03d\n", 216 seq_printf(seq, "%08X %08X %02X %03u\n",
217 sk_wmem_alloc_get(s), 217 sk_wmem_alloc_get(s),
218 sk_rmem_alloc_get(s), 218 sk_rmem_alloc_get(s),
219 s->sk_state, 219 s->sk_state,
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index d14152e866d9..ffcec225b5d9 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -44,12 +44,12 @@ static int irlan_eth_open(struct net_device *dev);
44static int irlan_eth_close(struct net_device *dev); 44static int irlan_eth_close(struct net_device *dev);
45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, 45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
46 struct net_device *dev); 46 struct net_device *dev);
47static void irlan_eth_set_multicast_list( struct net_device *dev); 47static void irlan_eth_set_multicast_list(struct net_device *dev);
48 48
49static const struct net_device_ops irlan_eth_netdev_ops = { 49static const struct net_device_ops irlan_eth_netdev_ops = {
50 .ndo_open = irlan_eth_open, 50 .ndo_open = irlan_eth_open,
51 .ndo_stop = irlan_eth_close, 51 .ndo_stop = irlan_eth_close,
52 .ndo_start_xmit = irlan_eth_xmit, 52 .ndo_start_xmit = irlan_eth_xmit,
53 .ndo_set_rx_mode = irlan_eth_set_multicast_list, 53 .ndo_set_rx_mode = irlan_eth_set_multicast_list,
54 .ndo_change_mtu = eth_change_mtu, 54 .ndo_change_mtu = eth_change_mtu,
55 .ndo_validate_addr = eth_validate_addr, 55 .ndo_validate_addr = eth_validate_addr,
@@ -110,7 +110,7 @@ static int irlan_eth_open(struct net_device *dev)
110{ 110{
111 struct irlan_cb *self = netdev_priv(dev); 111 struct irlan_cb *self = netdev_priv(dev);
112 112
113 IRDA_DEBUG(2, "%s()\n", __func__ ); 113 IRDA_DEBUG(2, "%s()\n", __func__);
114 114
115 /* Ready to play! */ 115 /* Ready to play! */
116 netif_stop_queue(dev); /* Wait until data link is ready */ 116 netif_stop_queue(dev); /* Wait until data link is ready */
@@ -137,7 +137,7 @@ static int irlan_eth_close(struct net_device *dev)
137{ 137{
138 struct irlan_cb *self = netdev_priv(dev); 138 struct irlan_cb *self = netdev_priv(dev);
139 139
140 IRDA_DEBUG(2, "%s()\n", __func__ ); 140 IRDA_DEBUG(2, "%s()\n", __func__);
141 141
142 /* Stop device */ 142 /* Stop device */
143 netif_stop_queue(dev); 143 netif_stop_queue(dev);
@@ -310,35 +310,32 @@ static void irlan_eth_set_multicast_list(struct net_device *dev)
310{ 310{
311 struct irlan_cb *self = netdev_priv(dev); 311 struct irlan_cb *self = netdev_priv(dev);
312 312
313 IRDA_DEBUG(2, "%s()\n", __func__ ); 313 IRDA_DEBUG(2, "%s()\n", __func__);
314 314
315 /* Check if data channel has been connected yet */ 315 /* Check if data channel has been connected yet */
316 if (self->client.state != IRLAN_DATA) { 316 if (self->client.state != IRLAN_DATA) {
317 IRDA_DEBUG(1, "%s(), delaying!\n", __func__ ); 317 IRDA_DEBUG(1, "%s(), delaying!\n", __func__);
318 return; 318 return;
319 } 319 }
320 320
321 if (dev->flags & IFF_PROMISC) { 321 if (dev->flags & IFF_PROMISC) {
322 /* Enable promiscuous mode */ 322 /* Enable promiscuous mode */
323 IRDA_WARNING("Promiscuous mode not implemented by IrLAN!\n"); 323 IRDA_WARNING("Promiscuous mode not implemented by IrLAN!\n");
324 } 324 } else if ((dev->flags & IFF_ALLMULTI) ||
325 else if ((dev->flags & IFF_ALLMULTI) ||
326 netdev_mc_count(dev) > HW_MAX_ADDRS) { 325 netdev_mc_count(dev) > HW_MAX_ADDRS) {
327 /* Disable promiscuous mode, use normal mode. */ 326 /* Disable promiscuous mode, use normal mode. */
328 IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ ); 327 IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__);
329 /* hardware_set_filter(NULL); */ 328 /* hardware_set_filter(NULL); */
330 329
331 irlan_set_multicast_filter(self, TRUE); 330 irlan_set_multicast_filter(self, TRUE);
332 } 331 } else if (!netdev_mc_empty(dev)) {
333 else if (!netdev_mc_empty(dev)) { 332 IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__);
334 IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ );
335 /* Walk the address list, and load the filter */ 333 /* Walk the address list, and load the filter */
336 /* hardware_set_filter(dev->mc_list); */ 334 /* hardware_set_filter(dev->mc_list); */
337 335
338 irlan_set_multicast_filter(self, TRUE); 336 irlan_set_multicast_filter(self, TRUE);
339 } 337 } else {
340 else { 338 IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __func__);
341 IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __func__ );
342 irlan_set_multicast_filter(self, FALSE); 339 irlan_set_multicast_filter(self, FALSE);
343 } 340 }
344 341
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index de73f6496db5..d6a59651767a 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -73,7 +73,7 @@ static int min_lap_keepalive_time = 100; /* 100us */
73/* For other sysctl, I've no idea of the range. Maybe Dag could help 73/* For other sysctl, I've no idea of the range. Maybe Dag could help
74 * us on that - Jean II */ 74 * us on that - Jean II */
75 75
76static int do_devname(ctl_table *table, int write, 76static int do_devname(struct ctl_table *table, int write,
77 void __user *buffer, size_t *lenp, loff_t *ppos) 77 void __user *buffer, size_t *lenp, loff_t *ppos)
78{ 78{
79 int ret; 79 int ret;
@@ -90,7 +90,7 @@ static int do_devname(ctl_table *table, int write,
90} 90}
91 91
92 92
93static int do_discovery(ctl_table *table, int write, 93static int do_discovery(struct ctl_table *table, int write,
94 void __user *buffer, size_t *lenp, loff_t *ppos) 94 void __user *buffer, size_t *lenp, loff_t *ppos)
95{ 95{
96 int ret; 96 int ret;
@@ -111,7 +111,7 @@ static int do_discovery(ctl_table *table, int write,
111} 111}
112 112
113/* One file */ 113/* One file */
114static ctl_table irda_table[] = { 114static struct ctl_table irda_table[] = {
115 { 115 {
116 .procname = "discovery", 116 .procname = "discovery",
117 .data = &sysctl_discovery, 117 .data = &sysctl_discovery,
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index ae43c62f9045..85372cfa7b9f 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -75,7 +75,7 @@ static pi_minor_info_t pi_minor_call_table[] = {
75 { NULL, 0 }, /* 0x00 */ 75 { NULL, 0 }, /* 0x00 */
76 { irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */ 76 { irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */
77}; 77};
78static pi_major_info_t pi_major_call_table[] = {{ pi_minor_call_table, 2 }}; 78static pi_major_info_t pi_major_call_table[] = { { pi_minor_call_table, 2 } };
79static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 }; 79static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 };
80 80
81/************************ GLOBAL PROCEDURES ************************/ 81/************************ GLOBAL PROCEDURES ************************/
@@ -205,7 +205,7 @@ static void irttp_todo_expired(unsigned long data)
205 */ 205 */
206static void irttp_flush_queues(struct tsap_cb *self) 206static void irttp_flush_queues(struct tsap_cb *self)
207{ 207{
208 struct sk_buff* skb; 208 struct sk_buff *skb;
209 209
210 IRDA_DEBUG(4, "%s()\n", __func__); 210 IRDA_DEBUG(4, "%s()\n", __func__);
211 211
@@ -400,7 +400,7 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
400 /* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to 400 /* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to
401 * use only 0x01-0x6F. Of course, we can use LSAP_ANY as well. 401 * use only 0x01-0x6F. Of course, we can use LSAP_ANY as well.
402 * JeanII */ 402 * JeanII */
403 if((stsap_sel != LSAP_ANY) && 403 if ((stsap_sel != LSAP_ANY) &&
404 ((stsap_sel < 0x01) || (stsap_sel >= 0x70))) { 404 ((stsap_sel < 0x01) || (stsap_sel >= 0x70))) {
405 IRDA_DEBUG(0, "%s(), invalid tsap!\n", __func__); 405 IRDA_DEBUG(0, "%s(), invalid tsap!\n", __func__);
406 return NULL; 406 return NULL;
@@ -427,7 +427,7 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
427 ttp_notify.data_indication = irttp_data_indication; 427 ttp_notify.data_indication = irttp_data_indication;
428 ttp_notify.udata_indication = irttp_udata_indication; 428 ttp_notify.udata_indication = irttp_udata_indication;
429 ttp_notify.flow_indication = irttp_flow_indication; 429 ttp_notify.flow_indication = irttp_flow_indication;
430 if(notify->status_indication != NULL) 430 if (notify->status_indication != NULL)
431 ttp_notify.status_indication = irttp_status_indication; 431 ttp_notify.status_indication = irttp_status_indication;
432 ttp_notify.instance = self; 432 ttp_notify.instance = self;
433 strncpy(ttp_notify.name, notify->name, NOTIFY_MAX_NAME); 433 strncpy(ttp_notify.name, notify->name, NOTIFY_MAX_NAME);
@@ -639,8 +639,7 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
639 */ 639 */
640 if ((self->tx_max_sdu_size != 0) && 640 if ((self->tx_max_sdu_size != 0) &&
641 (self->tx_max_sdu_size != TTP_SAR_UNBOUND) && 641 (self->tx_max_sdu_size != TTP_SAR_UNBOUND) &&
642 (skb->len > self->tx_max_sdu_size)) 642 (skb->len > self->tx_max_sdu_size)) {
643 {
644 IRDA_ERROR("%s: SAR enabled, but data is larger than TxMaxSduSize!\n", 643 IRDA_ERROR("%s: SAR enabled, but data is larger than TxMaxSduSize!\n",
645 __func__); 644 __func__);
646 ret = -EMSGSIZE; 645 ret = -EMSGSIZE;
@@ -733,8 +732,7 @@ static void irttp_run_tx_queue(struct tsap_cb *self)
733 * poll us through irttp_flow_indication() - Jean II */ 732 * poll us through irttp_flow_indication() - Jean II */
734 while ((self->send_credit > 0) && 733 while ((self->send_credit > 0) &&
735 (!irlmp_lap_tx_queue_full(self->lsap)) && 734 (!irlmp_lap_tx_queue_full(self->lsap)) &&
736 (skb = skb_dequeue(&self->tx_queue))) 735 (skb = skb_dequeue(&self->tx_queue))) {
737 {
738 /* 736 /*
739 * Since we can transmit and receive frames concurrently, 737 * Since we can transmit and receive frames concurrently,
740 * the code below is a critical region and we must assure that 738 * the code below is a critical region and we must assure that
@@ -798,8 +796,7 @@ static void irttp_run_tx_queue(struct tsap_cb *self)
798 * where we can spend a bit of time doing stuff. - Jean II */ 796 * where we can spend a bit of time doing stuff. - Jean II */
799 if ((self->tx_sdu_busy) && 797 if ((self->tx_sdu_busy) &&
800 (skb_queue_len(&self->tx_queue) < TTP_TX_LOW_THRESHOLD) && 798 (skb_queue_len(&self->tx_queue) < TTP_TX_LOW_THRESHOLD) &&
801 (!self->close_pend)) 799 (!self->close_pend)) {
802 {
803 if (self->notify.flow_indication) 800 if (self->notify.flow_indication)
804 self->notify.flow_indication(self->notify.instance, 801 self->notify.flow_indication(self->notify.instance,
805 self, FLOW_START); 802 self, FLOW_START);
@@ -892,7 +889,7 @@ static int irttp_udata_indication(void *instance, void *sap,
892 /* Just pass data to layer above */ 889 /* Just pass data to layer above */
893 if (self->notify.udata_indication) { 890 if (self->notify.udata_indication) {
894 err = self->notify.udata_indication(self->notify.instance, 891 err = self->notify.udata_indication(self->notify.instance,
895 self,skb); 892 self, skb);
896 /* Same comment as in irttp_do_data_indication() */ 893 /* Same comment as in irttp_do_data_indication() */
897 if (!err) 894 if (!err)
898 return 0; 895 return 0;
@@ -1057,7 +1054,7 @@ static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
1057 * to do that. Jean II */ 1054 * to do that. Jean II */
1058 1055
1059 /* If we need to send disconnect. try to do it now */ 1056 /* If we need to send disconnect. try to do it now */
1060 if(self->disconnect_pend) 1057 if (self->disconnect_pend)
1061 irttp_start_todo_timer(self, 0); 1058 irttp_start_todo_timer(self, 0);
1062} 1059}
1063 1060
@@ -1116,7 +1113,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
1116 IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -EBADR;); 1113 IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -EBADR;);
1117 1114
1118 if (self->connected) { 1115 if (self->connected) {
1119 if(userdata) 1116 if (userdata)
1120 dev_kfree_skb(userdata); 1117 dev_kfree_skb(userdata);
1121 return -EISCONN; 1118 return -EISCONN;
1122 } 1119 }
@@ -1137,7 +1134,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
1137 * headers 1134 * headers
1138 */ 1135 */
1139 IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER, 1136 IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
1140 { dev_kfree_skb(userdata); return -1; } ); 1137 { dev_kfree_skb(userdata); return -1; });
1141 } 1138 }
1142 1139
1143 /* Initialize connection parameters */ 1140 /* Initialize connection parameters */
@@ -1157,7 +1154,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
1157 * Give away max 127 credits for now 1154 * Give away max 127 credits for now
1158 */ 1155 */
1159 if (n > 127) { 1156 if (n > 127) {
1160 self->avail_credit=n-127; 1157 self->avail_credit = n - 127;
1161 n = 127; 1158 n = 127;
1162 } 1159 }
1163 1160
@@ -1166,10 +1163,10 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
1166 /* SAR enabled? */ 1163 /* SAR enabled? */
1167 if (max_sdu_size > 0) { 1164 if (max_sdu_size > 0) {
1168 IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER), 1165 IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
1169 { dev_kfree_skb(tx_skb); return -1; } ); 1166 { dev_kfree_skb(tx_skb); return -1; });
1170 1167
1171 /* Insert SAR parameters */ 1168 /* Insert SAR parameters */
1172 frame = skb_push(tx_skb, TTP_HEADER+TTP_SAR_HEADER); 1169 frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER);
1173 1170
1174 frame[0] = TTP_PARAMETERS | n; 1171 frame[0] = TTP_PARAMETERS | n;
1175 frame[1] = 0x04; /* Length */ 1172 frame[1] = 0x04; /* Length */
@@ -1386,7 +1383,7 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
1386 * headers 1383 * headers
1387 */ 1384 */
1388 IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER, 1385 IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
1389 { dev_kfree_skb(userdata); return -1; } ); 1386 { dev_kfree_skb(userdata); return -1; });
1390 } 1387 }
1391 1388
1392 self->avail_credit = 0; 1389 self->avail_credit = 0;
@@ -1409,10 +1406,10 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
1409 /* SAR enabled? */ 1406 /* SAR enabled? */
1410 if (max_sdu_size > 0) { 1407 if (max_sdu_size > 0) {
1411 IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER), 1408 IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
1412 { dev_kfree_skb(tx_skb); return -1; } ); 1409 { dev_kfree_skb(tx_skb); return -1; });
1413 1410
1414 /* Insert TTP header with SAR parameters */ 1411 /* Insert TTP header with SAR parameters */
1415 frame = skb_push(tx_skb, TTP_HEADER+TTP_SAR_HEADER); 1412 frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER);
1416 1413
1417 frame[0] = TTP_PARAMETERS | n; 1414 frame[0] = TTP_PARAMETERS | n;
1418 frame[1] = 0x04; /* Length */ 1415 frame[1] = 0x04; /* Length */
@@ -1522,7 +1519,7 @@ int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata,
1522 * function may be called from various context, like user, timer 1519 * function may be called from various context, like user, timer
1523 * for following a disconnect_indication() (i.e. net_bh). 1520 * for following a disconnect_indication() (i.e. net_bh).
1524 * Jean II */ 1521 * Jean II */
1525 if(test_and_set_bit(0, &self->disconnect_pend)) { 1522 if (test_and_set_bit(0, &self->disconnect_pend)) {
1526 IRDA_DEBUG(0, "%s(), disconnect already pending\n", 1523 IRDA_DEBUG(0, "%s(), disconnect already pending\n",
1527 __func__); 1524 __func__);
1528 if (userdata) 1525 if (userdata)
@@ -1627,7 +1624,7 @@ static void irttp_disconnect_indication(void *instance, void *sap,
1627 * Jean II */ 1624 * Jean II */
1628 1625
1629 /* No need to notify the client if has already tried to disconnect */ 1626 /* No need to notify the client if has already tried to disconnect */
1630 if(self->notify.disconnect_indication) 1627 if (self->notify.disconnect_indication)
1631 self->notify.disconnect_indication(self->notify.instance, self, 1628 self->notify.disconnect_indication(self->notify.instance, self,
1632 reason, skb); 1629 reason, skb);
1633 else 1630 else
@@ -1738,8 +1735,7 @@ static void irttp_run_rx_queue(struct tsap_cb *self)
1738 * This is the last fragment, so time to reassemble! 1735 * This is the last fragment, so time to reassemble!
1739 */ 1736 */
1740 if ((self->rx_sdu_size <= self->rx_max_sdu_size) || 1737 if ((self->rx_sdu_size <= self->rx_max_sdu_size) ||
1741 (self->rx_max_sdu_size == TTP_SAR_UNBOUND)) 1738 (self->rx_max_sdu_size == TTP_SAR_UNBOUND)) {
1742 {
1743 /* 1739 /*
1744 * A little optimizing. Only queue the fragment if 1740 * A little optimizing. Only queue the fragment if
1745 * there are other fragments. Since if this is the 1741 * there are other fragments. Since if this is the
@@ -1860,7 +1856,7 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
1860 seq_printf(seq, "dtsap_sel: %02x\n", 1856 seq_printf(seq, "dtsap_sel: %02x\n",
1861 self->dtsap_sel); 1857 self->dtsap_sel);
1862 seq_printf(seq, " connected: %s, ", 1858 seq_printf(seq, " connected: %s, ",
1863 self->connected? "TRUE":"FALSE"); 1859 self->connected ? "TRUE" : "FALSE");
1864 seq_printf(seq, "avail credit: %d, ", 1860 seq_printf(seq, "avail credit: %d, ",
1865 self->avail_credit); 1861 self->avail_credit);
1866 seq_printf(seq, "remote credit: %d, ", 1862 seq_printf(seq, "remote credit: %d, ",
@@ -1876,9 +1872,9 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
1876 seq_printf(seq, "rx_queue len: %u\n", 1872 seq_printf(seq, "rx_queue len: %u\n",
1877 skb_queue_len(&self->rx_queue)); 1873 skb_queue_len(&self->rx_queue));
1878 seq_printf(seq, " tx_sdu_busy: %s, ", 1874 seq_printf(seq, " tx_sdu_busy: %s, ",
1879 self->tx_sdu_busy? "TRUE":"FALSE"); 1875 self->tx_sdu_busy ? "TRUE" : "FALSE");
1880 seq_printf(seq, "rx_sdu_busy: %s\n", 1876 seq_printf(seq, "rx_sdu_busy: %s\n",
1881 self->rx_sdu_busy? "TRUE":"FALSE"); 1877 self->rx_sdu_busy ? "TRUE" : "FALSE");
1882 seq_printf(seq, " max_seg_size: %u, ", 1878 seq_printf(seq, " max_seg_size: %u, ",
1883 self->max_seg_size); 1879 self->max_seg_size);
1884 seq_printf(seq, "tx_max_sdu_size: %u, ", 1880 seq_printf(seq, "tx_max_sdu_size: %u, ",
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ae691651b721..168aff5e60de 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -2293,7 +2293,7 @@ out_unlock:
2293static int afiucv_netdev_event(struct notifier_block *this, 2293static int afiucv_netdev_event(struct notifier_block *this,
2294 unsigned long event, void *ptr) 2294 unsigned long event, void *ptr)
2295{ 2295{
2296 struct net_device *event_dev = (struct net_device *)ptr; 2296 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
2297 struct sock *sk; 2297 struct sock *sk;
2298 struct iucv_sock *iucv; 2298 struct iucv_sock *iucv;
2299 2299
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 4fe76ff214c2..cd5b8ec9be04 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -621,7 +621,7 @@ static void iucv_disable(void)
621 put_online_cpus(); 621 put_online_cpus();
622} 622}
623 623
624static int __cpuinit iucv_cpu_notify(struct notifier_block *self, 624static int iucv_cpu_notify(struct notifier_block *self,
625 unsigned long action, void *hcpu) 625 unsigned long action, void *hcpu)
626{ 626{
627 cpumask_t cpumask; 627 cpumask_t cpumask;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index c5fbd7589681..9d585370c5b4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -45,7 +45,7 @@ struct netns_pfkey {
45static DEFINE_MUTEX(pfkey_mutex); 45static DEFINE_MUTEX(pfkey_mutex);
46 46
47#define DUMMY_MARK 0 47#define DUMMY_MARK 0
48static struct xfrm_mark dummy_mark = {0, 0}; 48static const struct xfrm_mark dummy_mark = {0, 0};
49struct pfkey_sock { 49struct pfkey_sock {
50 /* struct sock must be the first member of struct pfkey_sock */ 50 /* struct sock must be the first member of struct pfkey_sock */
51 struct sock sk; 51 struct sock sk;
@@ -338,7 +338,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
338 return 0; 338 return 0;
339} 339}
340 340
341static u8 sadb_ext_min_len[] = { 341static const u8 sadb_ext_min_len[] = {
342 [SADB_EXT_RESERVED] = (u8) 0, 342 [SADB_EXT_RESERVED] = (u8) 0,
343 [SADB_EXT_SA] = (u8) sizeof(struct sadb_sa), 343 [SADB_EXT_SA] = (u8) sizeof(struct sadb_sa),
344 [SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime), 344 [SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime),
@@ -1196,10 +1196,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
1196 1196
1197 x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1197 x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1198 &x->props.saddr); 1198 &x->props.saddr);
1199 if (!x->props.family) {
1200 err = -EAFNOSUPPORT;
1201 goto out;
1202 }
1203 pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1], 1199 pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1],
1204 &x->id.daddr); 1200 &x->id.daddr);
1205 1201
@@ -1710,6 +1706,7 @@ static int key_notify_sa_flush(const struct km_event *c)
1710 hdr->sadb_msg_version = PF_KEY_V2; 1706 hdr->sadb_msg_version = PF_KEY_V2;
1711 hdr->sadb_msg_errno = (uint8_t) 0; 1707 hdr->sadb_msg_errno = (uint8_t) 0;
1712 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); 1708 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
1709 hdr->sadb_msg_reserved = 0;
1713 1710
1714 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); 1711 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
1715 1712
@@ -2080,6 +2077,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *
2080 pol->sadb_x_policy_type = IPSEC_POLICY_NONE; 2077 pol->sadb_x_policy_type = IPSEC_POLICY_NONE;
2081 } 2078 }
2082 pol->sadb_x_policy_dir = dir+1; 2079 pol->sadb_x_policy_dir = dir+1;
2080 pol->sadb_x_policy_reserved = 0;
2083 pol->sadb_x_policy_id = xp->index; 2081 pol->sadb_x_policy_id = xp->index;
2084 pol->sadb_x_policy_priority = xp->priority; 2082 pol->sadb_x_policy_priority = xp->priority;
2085 2083
@@ -2203,10 +2201,6 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
2203 2201
2204 sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; 2202 sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
2205 xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr); 2203 xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr);
2206 if (!xp->family) {
2207 err = -EINVAL;
2208 goto out;
2209 }
2210 xp->selector.family = xp->family; 2204 xp->selector.family = xp->family;
2211 xp->selector.prefixlen_s = sa->sadb_address_prefixlen; 2205 xp->selector.prefixlen_s = sa->sadb_address_prefixlen;
2212 xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); 2206 xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2699,6 +2693,7 @@ static int key_notify_policy_flush(const struct km_event *c)
2699 hdr->sadb_msg_errno = (uint8_t) 0; 2693 hdr->sadb_msg_errno = (uint8_t) 0;
2700 hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; 2694 hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
2701 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); 2695 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
2696 hdr->sadb_msg_reserved = 0;
2702 pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); 2697 pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
2703 return 0; 2698 return 0;
2704 2699
@@ -2734,7 +2729,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
2734 2729
2735typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, 2730typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
2736 const struct sadb_msg *hdr, void * const *ext_hdrs); 2731 const struct sadb_msg *hdr, void * const *ext_hdrs);
2737static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { 2732static const pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
2738 [SADB_RESERVED] = pfkey_reserved, 2733 [SADB_RESERVED] = pfkey_reserved,
2739 [SADB_GETSPI] = pfkey_getspi, 2734 [SADB_GETSPI] = pfkey_getspi,
2740 [SADB_UPDATE] = pfkey_add, 2735 [SADB_UPDATE] = pfkey_add,
@@ -3135,7 +3130,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
3135 pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; 3130 pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
3136 pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; 3131 pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
3137 pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; 3132 pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1;
3133 pol->sadb_x_policy_reserved = 0;
3138 pol->sadb_x_policy_id = xp->index; 3134 pol->sadb_x_policy_id = xp->index;
3135 pol->sadb_x_policy_priority = xp->priority;
3139 3136
3140 /* Set sadb_comb's. */ 3137 /* Set sadb_comb's. */
3141 if (x->id.proto == IPPROTO_AH) 3138 if (x->id.proto == IPPROTO_AH)
@@ -3523,6 +3520,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3523 pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; 3520 pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
3524 pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; 3521 pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
3525 pol->sadb_x_policy_dir = dir + 1; 3522 pol->sadb_x_policy_dir = dir + 1;
3523 pol->sadb_x_policy_reserved = 0;
3526 pol->sadb_x_policy_id = 0; 3524 pol->sadb_x_policy_id = 0;
3527 pol->sadb_x_policy_priority = 0; 3525 pol->sadb_x_policy_priority = 0;
3528 3526
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 6984c3a353cd..feae495a0a30 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -414,10 +414,7 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
414 if (L2TP_SKB_CB(skb)->has_seq) { 414 if (L2TP_SKB_CB(skb)->has_seq) {
415 /* Bump our Nr */ 415 /* Bump our Nr */
416 session->nr++; 416 session->nr++;
417 if (tunnel->version == L2TP_HDR_VER_2) 417 session->nr &= session->nr_max;
418 session->nr &= 0xffff;
419 else
420 session->nr &= 0xffffff;
421 418
422 l2tp_dbg(session, L2TP_MSG_SEQ, "%s: updated nr to %hu\n", 419 l2tp_dbg(session, L2TP_MSG_SEQ, "%s: updated nr to %hu\n",
423 session->name, session->nr); 420 session->name, session->nr);
@@ -542,6 +539,84 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk,
542 return __skb_checksum_complete(skb); 539 return __skb_checksum_complete(skb);
543} 540}
544 541
542static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr)
543{
544 u32 nws;
545
546 if (nr >= session->nr)
547 nws = nr - session->nr;
548 else
549 nws = (session->nr_max + 1) - (session->nr - nr);
550
551 return nws < session->nr_window_size;
552}
553
554/* If packet has sequence numbers, queue it if acceptable. Returns 0 if
555 * acceptable, else non-zero.
556 */
557static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb)
558{
559 if (!l2tp_seq_check_rx_window(session, L2TP_SKB_CB(skb)->ns)) {
560 /* Packet sequence number is outside allowed window.
561 * Discard it.
562 */
563 l2tp_dbg(session, L2TP_MSG_SEQ,
564 "%s: pkt %u len %d discarded, outside window, nr=%u\n",
565 session->name, L2TP_SKB_CB(skb)->ns,
566 L2TP_SKB_CB(skb)->length, session->nr);
567 goto discard;
568 }
569
570 if (session->reorder_timeout != 0) {
571 /* Packet reordering enabled. Add skb to session's
572 * reorder queue, in order of ns.
573 */
574 l2tp_recv_queue_skb(session, skb);
575 goto out;
576 }
577
578 /* Packet reordering disabled. Discard out-of-sequence packets, while
579 * tracking the number if in-sequence packets after the first OOS packet
580 * is seen. After nr_oos_count_max in-sequence packets, reset the
581 * sequence number to re-enable packet reception.
582 */
583 if (L2TP_SKB_CB(skb)->ns == session->nr) {
584 skb_queue_tail(&session->reorder_q, skb);
585 } else {
586 u32 nr_oos = L2TP_SKB_CB(skb)->ns;
587 u32 nr_next = (session->nr_oos + 1) & session->nr_max;
588
589 if (nr_oos == nr_next)
590 session->nr_oos_count++;
591 else
592 session->nr_oos_count = 0;
593
594 session->nr_oos = nr_oos;
595 if (session->nr_oos_count > session->nr_oos_count_max) {
596 session->reorder_skip = 1;
597 l2tp_dbg(session, L2TP_MSG_SEQ,
598 "%s: %d oos packets received. Resetting sequence numbers\n",
599 session->name, session->nr_oos_count);
600 }
601 if (!session->reorder_skip) {
602 atomic_long_inc(&session->stats.rx_seq_discards);
603 l2tp_dbg(session, L2TP_MSG_SEQ,
604 "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n",
605 session->name, L2TP_SKB_CB(skb)->ns,
606 L2TP_SKB_CB(skb)->length, session->nr,
607 skb_queue_len(&session->reorder_q));
608 goto discard;
609 }
610 skb_queue_tail(&session->reorder_q, skb);
611 }
612
613out:
614 return 0;
615
616discard:
617 return 1;
618}
619
545/* Do receive processing of L2TP data frames. We handle both L2TPv2 620/* Do receive processing of L2TP data frames. We handle both L2TPv2
546 * and L2TPv3 data frames here. 621 * and L2TPv3 data frames here.
547 * 622 *
@@ -757,26 +832,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
757 * enabled. Saved L2TP protocol info is stored in skb->sb[]. 832 * enabled. Saved L2TP protocol info is stored in skb->sb[].
758 */ 833 */
759 if (L2TP_SKB_CB(skb)->has_seq) { 834 if (L2TP_SKB_CB(skb)->has_seq) {
760 if (session->reorder_timeout != 0) { 835 if (l2tp_recv_data_seq(session, skb))
761 /* Packet reordering enabled. Add skb to session's 836 goto discard;
762 * reorder queue, in order of ns.
763 */
764 l2tp_recv_queue_skb(session, skb);
765 } else {
766 /* Packet reordering disabled. Discard out-of-sequence
767 * packets
768 */
769 if (L2TP_SKB_CB(skb)->ns != session->nr) {
770 atomic_long_inc(&session->stats.rx_seq_discards);
771 l2tp_dbg(session, L2TP_MSG_SEQ,
772 "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n",
773 session->name, L2TP_SKB_CB(skb)->ns,
774 L2TP_SKB_CB(skb)->length, session->nr,
775 skb_queue_len(&session->reorder_q));
776 goto discard;
777 }
778 skb_queue_tail(&session->reorder_q, skb);
779 }
780 } else { 837 } else {
781 /* No sequence numbers. Add the skb to the tail of the 838 /* No sequence numbers. Add the skb to the tail of the
782 * reorder queue. This ensures that it will be 839 * reorder queue. This ensures that it will be
@@ -1812,6 +1869,15 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
1812 session->session_id = session_id; 1869 session->session_id = session_id;
1813 session->peer_session_id = peer_session_id; 1870 session->peer_session_id = peer_session_id;
1814 session->nr = 0; 1871 session->nr = 0;
1872 if (tunnel->version == L2TP_HDR_VER_2)
1873 session->nr_max = 0xffff;
1874 else
1875 session->nr_max = 0xffffff;
1876 session->nr_window_size = session->nr_max / 2;
1877 session->nr_oos_count_max = 4;
1878
1879 /* Use NR of first received packet */
1880 session->reorder_skip = 1;
1815 1881
1816 sprintf(&session->name[0], "sess %u/%u", 1882 sprintf(&session->name[0], "sess %u/%u",
1817 tunnel->tunnel_id, session->session_id); 1883 tunnel->tunnel_id, session->session_id);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 485a490fd990..66a559b104b6 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -102,6 +102,11 @@ struct l2tp_session {
102 u32 nr; /* session NR state (receive) */ 102 u32 nr; /* session NR state (receive) */
103 u32 ns; /* session NR state (send) */ 103 u32 ns; /* session NR state (send) */
104 struct sk_buff_head reorder_q; /* receive reorder queue */ 104 struct sk_buff_head reorder_q; /* receive reorder queue */
105 u32 nr_max; /* max NR. Depends on tunnel */
106 u32 nr_window_size; /* NR window size */
107 u32 nr_oos; /* NR of last OOS packet */
108 int nr_oos_count; /* For OOS recovery */
109 int nr_oos_count_max;
105 struct hlist_node hlist; /* Hash list node */ 110 struct hlist_node hlist; /* Hash list node */
106 atomic_t ref_count; 111 atomic_t ref_count;
107 112
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 637a341c1e2d..5ebee2ded9e9 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -346,19 +346,19 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
346 skb_put(skb, 2); 346 skb_put(skb, 2);
347 347
348 /* Copy user data into skb */ 348 /* Copy user data into skb */
349 error = memcpy_fromiovec(skb->data, m->msg_iov, total_len); 349 error = memcpy_fromiovec(skb_put(skb, total_len), m->msg_iov,
350 total_len);
350 if (error < 0) { 351 if (error < 0) {
351 kfree_skb(skb); 352 kfree_skb(skb);
352 goto error_put_sess_tun; 353 goto error_put_sess_tun;
353 } 354 }
354 skb_put(skb, total_len);
355 355
356 l2tp_xmit_skb(session, skb, session->hdr_len); 356 l2tp_xmit_skb(session, skb, session->hdr_len);
357 357
358 sock_put(ps->tunnel_sock); 358 sock_put(ps->tunnel_sock);
359 sock_put(sk); 359 sock_put(sk);
360 360
361 return error; 361 return total_len;
362 362
363error_put_sess_tun: 363error_put_sess_tun:
364 sock_put(ps->tunnel_sock); 364 sock_put(ps->tunnel_sock);
@@ -1793,7 +1793,8 @@ static const struct proto_ops pppol2tp_ops = {
1793 1793
1794static const struct pppox_proto pppol2tp_proto = { 1794static const struct pppox_proto pppol2tp_proto = {
1795 .create = pppol2tp_create, 1795 .create = pppol2tp_create,
1796 .ioctl = pppol2tp_ioctl 1796 .ioctl = pppol2tp_ioctl,
1797 .owner = THIS_MODULE,
1797}; 1798};
1798 1799
1799#ifdef CONFIG_L2TP_V3 1800#ifdef CONFIG_L2TP_V3
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 54563ad8aeb1..355cc3b6fa4d 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param)
154 } else { 154 } else {
155 lapb->n2count++; 155 lapb->n2count++;
156 lapb_requeue_frames(lapb); 156 lapb_requeue_frames(lapb);
157 lapb_kick(lapb);
157 } 158 }
158 break; 159 break;
159 160
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 48aaa89253e0..6cba486353e8 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -321,12 +321,12 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
321 if (llc->dev) { 321 if (llc->dev) {
322 if (!addr->sllc_arphrd) 322 if (!addr->sllc_arphrd)
323 addr->sllc_arphrd = llc->dev->type; 323 addr->sllc_arphrd = llc->dev->type;
324 if (llc_mac_null(addr->sllc_mac)) 324 if (is_zero_ether_addr(addr->sllc_mac))
325 memcpy(addr->sllc_mac, llc->dev->dev_addr, 325 memcpy(addr->sllc_mac, llc->dev->dev_addr,
326 IFHWADDRLEN); 326 IFHWADDRLEN);
327 if (addr->sllc_arphrd != llc->dev->type || 327 if (addr->sllc_arphrd != llc->dev->type ||
328 !llc_mac_match(addr->sllc_mac, 328 !ether_addr_equal(addr->sllc_mac,
329 llc->dev->dev_addr)) { 329 llc->dev->dev_addr)) {
330 rc = -EINVAL; 330 rc = -EINVAL;
331 llc->dev = NULL; 331 llc->dev = NULL;
332 } 332 }
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 0d0d416dfab6..cd8724177965 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -478,8 +478,8 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
478 478
479 return llc->laddr.lsap == laddr->lsap && 479 return llc->laddr.lsap == laddr->lsap &&
480 llc->daddr.lsap == daddr->lsap && 480 llc->daddr.lsap == daddr->lsap &&
481 llc_mac_match(llc->laddr.mac, laddr->mac) && 481 ether_addr_equal(llc->laddr.mac, laddr->mac) &&
482 llc_mac_match(llc->daddr.mac, daddr->mac); 482 ether_addr_equal(llc->daddr.mac, daddr->mac);
483} 483}
484 484
485/** 485/**
@@ -550,7 +550,7 @@ static inline bool llc_listener_match(const struct llc_sap *sap,
550 550
551 return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN && 551 return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
552 llc->laddr.lsap == laddr->lsap && 552 llc->laddr.lsap == laddr->lsap &&
553 llc_mac_match(llc->laddr.mac, laddr->mac); 553 ether_addr_equal(llc->laddr.mac, laddr->mac);
554} 554}
555 555
556static struct sock *__llc_lookup_listener(struct llc_sap *sap, 556static struct sock *__llc_lookup_listener(struct llc_sap *sap,
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 7b4799cfbf8d..1a3c7e0f5d0d 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -147,7 +147,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v)
147 } 147 }
148 seq_printf(seq, "@%02X ", llc->sap->laddr.lsap); 148 seq_printf(seq, "@%02X ", llc->sap->laddr.lsap);
149 llc_ui_format_mac(seq, llc->daddr.mac); 149 llc_ui_format_mac(seq, llc->daddr.mac);
150 seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap, 150 seq_printf(seq, "@%02X %8d %8d %2d %3u %4d\n", llc->daddr.lsap,
151 sk_wmem_alloc_get(sk), 151 sk_wmem_alloc_get(sk),
152 sk_rmem_alloc_get(sk) - llc->copied_seq, 152 sk_rmem_alloc_get(sk) - llc->copied_seq,
153 sk->sk_state, 153 sk->sk_state,
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 78be45cda5c1..e5850699098e 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -302,7 +302,7 @@ static inline bool llc_dgram_match(const struct llc_sap *sap,
302 302
303 return sk->sk_type == SOCK_DGRAM && 303 return sk->sk_type == SOCK_DGRAM &&
304 llc->laddr.lsap == laddr->lsap && 304 llc->laddr.lsap == laddr->lsap &&
305 llc_mac_match(llc->laddr.mac, laddr->mac); 305 ether_addr_equal(llc->laddr.mac, laddr->mac);
306} 306}
307 307
308/** 308/**
@@ -425,7 +425,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
425 llc_pdu_decode_da(skb, laddr.mac); 425 llc_pdu_decode_da(skb, laddr.mac);
426 llc_pdu_decode_dsap(skb, &laddr.lsap); 426 llc_pdu_decode_dsap(skb, &laddr.lsap);
427 427
428 if (llc_mac_multicast(laddr.mac)) { 428 if (is_multicast_ether_addr(laddr.mac)) {
429 llc_sap_mcast(sap, &laddr, skb); 429 llc_sap_mcast(sap, &laddr, skb);
430 kfree_skb(skb); 430 kfree_skb(skb);
431 } else { 431 } else {
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 0785e95c9924..be7614b9ed27 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -85,7 +85,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch,
85 *cpos++ = *pos++ ^ e[i]; 85 *cpos++ = *pos++ ^ e[i];
86 } 86 }
87 87
88 for (i = 0; i < CCMP_MIC_LEN; i++) 88 for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++)
89 mic[i] = b[i] ^ s_0[i]; 89 mic[i] = b[i] ^ s_0[i];
90} 90}
91 91
@@ -123,7 +123,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch,
123 crypto_cipher_encrypt_one(tfm, a, a); 123 crypto_cipher_encrypt_one(tfm, a, a);
124 } 124 }
125 125
126 for (i = 0; i < CCMP_MIC_LEN; i++) { 126 for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++) {
127 if ((mic[i] ^ s_0[i]) != a[i]) 127 if ((mic[i] ^ s_0[i]) != a[i])
128 return -1; 128 return -1;
129 } 129 }
@@ -138,7 +138,7 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[])
138 138
139 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 139 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
140 if (!IS_ERR(tfm)) 140 if (!IS_ERR(tfm))
141 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN); 141 crypto_cipher_setkey(tfm, key, WLAN_KEY_LEN_CCMP);
142 142
143 return tfm; 143 return tfm;
144} 144}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1a89c80e6407..2e7855a1b10d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -73,16 +73,19 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
73 struct ieee80211_local *local = sdata->local; 73 struct ieee80211_local *local = sdata->local;
74 74
75 if (ieee80211_sdata_running(sdata)) { 75 if (ieee80211_sdata_running(sdata)) {
76 u32 mask = MONITOR_FLAG_COOK_FRAMES |
77 MONITOR_FLAG_ACTIVE;
78
76 /* 79 /*
77 * Prohibit MONITOR_FLAG_COOK_FRAMES to be 80 * Prohibit MONITOR_FLAG_COOK_FRAMES and
78 * changed while the interface is up. 81 * MONITOR_FLAG_ACTIVE to be changed while the
82 * interface is up.
79 * Else we would need to add a lot of cruft 83 * Else we would need to add a lot of cruft
80 * to update everything: 84 * to update everything:
81 * cooked_mntrs, monitor and all fif_* counters 85 * cooked_mntrs, monitor and all fif_* counters
82 * reconfigure hardware 86 * reconfigure hardware
83 */ 87 */
84 if ((*flags & MONITOR_FLAG_COOK_FRAMES) != 88 if ((*flags & mask) != (sdata->u.mntr_flags & mask))
85 (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES))
86 return -EBUSY; 89 return -EBUSY;
87 90
88 ieee80211_adjust_monitor_flags(sdata, -1); 91 ieee80211_adjust_monitor_flags(sdata, -1);
@@ -392,9 +395,13 @@ void sta_set_rate_info_tx(struct sta_info *sta,
392 rinfo->nss = ieee80211_rate_get_vht_nss(rate); 395 rinfo->nss = ieee80211_rate_get_vht_nss(rate);
393 } else { 396 } else {
394 struct ieee80211_supported_band *sband; 397 struct ieee80211_supported_band *sband;
398 int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
399 u16 brate;
400
395 sband = sta->local->hw.wiphy->bands[ 401 sband = sta->local->hw.wiphy->bands[
396 ieee80211_get_sdata_band(sta->sdata)]; 402 ieee80211_get_sdata_band(sta->sdata)];
397 rinfo->legacy = sband->bitrates[rate->idx].bitrate; 403 brate = sband->bitrates[rate->idx].bitrate;
404 rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
398 } 405 }
399 if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) 406 if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
400 rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; 407 rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
@@ -419,11 +426,13 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
419 rinfo->mcs = sta->last_rx_rate_idx; 426 rinfo->mcs = sta->last_rx_rate_idx;
420 } else { 427 } else {
421 struct ieee80211_supported_band *sband; 428 struct ieee80211_supported_band *sband;
429 int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
430 u16 brate;
422 431
423 sband = sta->local->hw.wiphy->bands[ 432 sband = sta->local->hw.wiphy->bands[
424 ieee80211_get_sdata_band(sta->sdata)]; 433 ieee80211_get_sdata_band(sta->sdata)];
425 rinfo->legacy = 434 brate = sband->bitrates[sta->last_rx_rate_idx].bitrate;
426 sband->bitrates[sta->last_rx_rate_idx].bitrate; 435 rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
427 } 436 }
428 437
429 if (sta->last_rx_rate_flag & RX_FLAG_40MHZ) 438 if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
@@ -444,7 +453,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
444 struct ieee80211_local *local = sdata->local; 453 struct ieee80211_local *local = sdata->local;
445 struct timespec uptime; 454 struct timespec uptime;
446 u64 packets = 0; 455 u64 packets = 0;
447 int ac; 456 int i, ac;
448 457
449 sinfo->generation = sdata->local->sta_generation; 458 sinfo->generation = sdata->local->sta_generation;
450 459
@@ -488,6 +497,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
488 sinfo->signal = (s8)sta->last_signal; 497 sinfo->signal = (s8)sta->last_signal;
489 sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); 498 sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
490 } 499 }
500 if (sta->chains) {
501 sinfo->filled |= STATION_INFO_CHAIN_SIGNAL |
502 STATION_INFO_CHAIN_SIGNAL_AVG;
503
504 sinfo->chains = sta->chains;
505 for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
506 sinfo->chain_signal[i] = sta->chain_signal_last[i];
507 sinfo->chain_signal_avg[i] =
508 (s8) -ewma_read(&sta->chain_signal_avg[i]);
509 }
510 }
491 511
492 sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); 512 sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
493 sta_set_rate_info_rx(sta, &sinfo->rxrate); 513 sta_set_rate_info_rx(sta, &sinfo->rxrate);
@@ -652,6 +672,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
652 if (sta->sdata->dev != dev) 672 if (sta->sdata->dev != dev)
653 continue; 673 continue;
654 674
675 sinfo.filled = 0;
676 sta_set_sinfo(sta, &sinfo);
655 i = 0; 677 i = 0;
656 ADD_STA_STATS(sta); 678 ADD_STA_STATS(sta);
657 } 679 }
@@ -728,7 +750,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy,
728 750
729 if (sset == ETH_SS_STATS) { 751 if (sset == ETH_SS_STATS) {
730 sz_sta_stats = sizeof(ieee80211_gstrings_sta_stats); 752 sz_sta_stats = sizeof(ieee80211_gstrings_sta_stats);
731 memcpy(data, *ieee80211_gstrings_sta_stats, sz_sta_stats); 753 memcpy(data, ieee80211_gstrings_sta_stats, sz_sta_stats);
732 } 754 }
733 drv_get_et_strings(sdata, sset, &(data[sz_sta_stats])); 755 drv_get_et_strings(sdata, sset, &(data[sz_sta_stats]));
734} 756}
@@ -840,8 +862,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
840 return 0; 862 return 0;
841} 863}
842 864
843static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, 865int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
844 struct cfg80211_beacon_data *params) 866 struct cfg80211_beacon_data *params)
845{ 867{
846 struct beacon_data *new, *old; 868 struct beacon_data *new, *old;
847 int new_head_len, new_tail_len; 869 int new_head_len, new_tail_len;
@@ -1004,6 +1026,12 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
1004 1026
1005 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1027 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1006 1028
1029 /* don't allow changing the beacon while CSA is in place - offset
1030 * of channel switch counter may change
1031 */
1032 if (sdata->vif.csa_active)
1033 return -EBUSY;
1034
1007 old = rtnl_dereference(sdata->u.ap.beacon); 1035 old = rtnl_dereference(sdata->u.ap.beacon);
1008 if (!old) 1036 if (!old)
1009 return -ENOENT; 1037 return -ENOENT;
@@ -1028,6 +1056,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
1028 return -ENOENT; 1056 return -ENOENT;
1029 old_probe_resp = rtnl_dereference(sdata->u.ap.probe_resp); 1057 old_probe_resp = rtnl_dereference(sdata->u.ap.probe_resp);
1030 1058
1059 /* abort any running channel switch */
1060 sdata->vif.csa_active = false;
1061 cancel_work_sync(&sdata->csa_finalize_work);
1062
1031 /* turn off carrier for this interface and dependent VLANs */ 1063 /* turn off carrier for this interface and dependent VLANs */
1032 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) 1064 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
1033 netif_carrier_off(vlan->dev); 1065 netif_carrier_off(vlan->dev);
@@ -1057,6 +1089,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
1057 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); 1089 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
1058 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); 1090 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
1059 1091
1092 if (sdata->wdev.cac_started) {
1093 cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
1094 cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_ABORTED,
1095 GFP_KERNEL);
1096 }
1097
1060 drv_stop_ap(sdata->local, sdata); 1098 drv_stop_ap(sdata->local, sdata);
1061 1099
1062 /* free all potentially still buffered bcast frames */ 1100 /* free all potentially still buffered bcast frames */
@@ -1170,8 +1208,6 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1170 struct station_parameters *params) 1208 struct station_parameters *params)
1171{ 1209{
1172 int ret = 0; 1210 int ret = 0;
1173 u32 rates;
1174 int i, j;
1175 struct ieee80211_supported_band *sband; 1211 struct ieee80211_supported_band *sband;
1176 struct ieee80211_sub_if_data *sdata = sta->sdata; 1212 struct ieee80211_sub_if_data *sdata = sta->sdata;
1177 enum ieee80211_band band = ieee80211_get_sdata_band(sdata); 1213 enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
@@ -1264,16 +1300,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1264 sta->listen_interval = params->listen_interval; 1300 sta->listen_interval = params->listen_interval;
1265 1301
1266 if (params->supported_rates) { 1302 if (params->supported_rates) {
1267 rates = 0; 1303 ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
1268 1304 sband, params->supported_rates,
1269 for (i = 0; i < params->supported_rates_len; i++) { 1305 params->supported_rates_len,
1270 int rate = (params->supported_rates[i] & 0x7f) * 5; 1306 &sta->sta.supp_rates[band]);
1271 for (j = 0; j < sband->n_bitrates; j++) {
1272 if (sband->bitrates[j].bitrate == rate)
1273 rates |= BIT(j);
1274 }
1275 }
1276 sta->sta.supp_rates[band] = rates;
1277 } 1307 }
1278 1308
1279 if (params->ht_capa) 1309 if (params->ht_capa)
@@ -1735,6 +1765,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
1735 ifmsh->mesh_pp_id = setup->path_sel_proto; 1765 ifmsh->mesh_pp_id = setup->path_sel_proto;
1736 ifmsh->mesh_pm_id = setup->path_metric; 1766 ifmsh->mesh_pm_id = setup->path_metric;
1737 ifmsh->user_mpm = setup->user_mpm; 1767 ifmsh->user_mpm = setup->user_mpm;
1768 ifmsh->mesh_auth_id = setup->auth_id;
1738 ifmsh->security = IEEE80211_MESH_SEC_NONE; 1769 ifmsh->security = IEEE80211_MESH_SEC_NONE;
1739 if (setup->is_authenticated) 1770 if (setup->is_authenticated)
1740 ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; 1771 ifmsh->security |= IEEE80211_MESH_SEC_AUTHED;
@@ -1744,6 +1775,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
1744 /* mcast rate setting in Mesh Node */ 1775 /* mcast rate setting in Mesh Node */
1745 memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, 1776 memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate,
1746 sizeof(setup->mcast_rate)); 1777 sizeof(setup->mcast_rate));
1778 sdata->vif.bss_conf.basic_rates = setup->basic_rates;
1747 1779
1748 sdata->vif.bss_conf.beacon_int = setup->beacon_interval; 1780 sdata->vif.bss_conf.beacon_int = setup->beacon_interval;
1749 sdata->vif.bss_conf.dtim_period = setup->dtim_period; 1781 sdata->vif.bss_conf.dtim_period = setup->dtim_period;
@@ -1856,6 +1888,8 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
1856 if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) 1888 if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask))
1857 conf->dot11MeshAwakeWindowDuration = 1889 conf->dot11MeshAwakeWindowDuration =
1858 nconf->dot11MeshAwakeWindowDuration; 1890 nconf->dot11MeshAwakeWindowDuration;
1891 if (_chg_mesh_attr(NL80211_MESHCONF_PLINK_TIMEOUT, mask))
1892 conf->plink_timeout = nconf->plink_timeout;
1859 ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); 1893 ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON);
1860 return 0; 1894 return 0;
1861} 1895}
@@ -1932,18 +1966,11 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
1932 } 1966 }
1933 1967
1934 if (params->basic_rates) { 1968 if (params->basic_rates) {
1935 int i, j; 1969 ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
1936 u32 rates = 0; 1970 wiphy->bands[band],
1937 struct ieee80211_supported_band *sband = wiphy->bands[band]; 1971 params->basic_rates,
1938 1972 params->basic_rates_len,
1939 for (i = 0; i < params->basic_rates_len; i++) { 1973 &sdata->vif.bss_conf.basic_rates);
1940 int rate = (params->basic_rates[i] & 0x7f) * 5;
1941 for (j = 0; j < sband->n_bitrates; j++) {
1942 if (sband->bitrates[j].bitrate == rate)
1943 rates |= BIT(j);
1944 }
1945 }
1946 sdata->vif.bss_conf.basic_rates = rates;
1947 changed |= BSS_CHANGED_BASIC_RATES; 1974 changed |= BSS_CHANGED_BASIC_RATES;
1948 } 1975 }
1949 1976
@@ -2275,14 +2302,25 @@ static void ieee80211_rfkill_poll(struct wiphy *wiphy)
2275} 2302}
2276 2303
2277#ifdef CONFIG_NL80211_TESTMODE 2304#ifdef CONFIG_NL80211_TESTMODE
2278static int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len) 2305static int ieee80211_testmode_cmd(struct wiphy *wiphy,
2306 struct wireless_dev *wdev,
2307 void *data, int len)
2279{ 2308{
2280 struct ieee80211_local *local = wiphy_priv(wiphy); 2309 struct ieee80211_local *local = wiphy_priv(wiphy);
2310 struct ieee80211_vif *vif = NULL;
2281 2311
2282 if (!local->ops->testmode_cmd) 2312 if (!local->ops->testmode_cmd)
2283 return -EOPNOTSUPP; 2313 return -EOPNOTSUPP;
2284 2314
2285 return local->ops->testmode_cmd(&local->hw, data, len); 2315 if (wdev) {
2316 struct ieee80211_sub_if_data *sdata;
2317
2318 sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
2319 if (sdata->flags & IEEE80211_SDATA_IN_DRIVER)
2320 vif = &sdata->vif;
2321 }
2322
2323 return local->ops->testmode_cmd(&local->hw, vif, data, len);
2286} 2324}
2287 2325
2288static int ieee80211_testmode_dump(struct wiphy *wiphy, 2326static int ieee80211_testmode_dump(struct wiphy *wiphy,
@@ -2306,7 +2344,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
2306 enum ieee80211_smps_mode old_req; 2344 enum ieee80211_smps_mode old_req;
2307 int err; 2345 int err;
2308 2346
2309 lockdep_assert_held(&sdata->u.mgd.mtx); 2347 lockdep_assert_held(&sdata->wdev.mtx);
2310 2348
2311 old_req = sdata->u.mgd.req_smps; 2349 old_req = sdata->u.mgd.req_smps;
2312 sdata->u.mgd.req_smps = smps_mode; 2350 sdata->u.mgd.req_smps = smps_mode;
@@ -2363,9 +2401,9 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
2363 local->dynamic_ps_forced_timeout = timeout; 2401 local->dynamic_ps_forced_timeout = timeout;
2364 2402
2365 /* no change, but if automatic follow powersave */ 2403 /* no change, but if automatic follow powersave */
2366 mutex_lock(&sdata->u.mgd.mtx); 2404 sdata_lock(sdata);
2367 __ieee80211_request_smps(sdata, sdata->u.mgd.req_smps); 2405 __ieee80211_request_smps(sdata, sdata->u.mgd.req_smps);
2368 mutex_unlock(&sdata->u.mgd.mtx); 2406 sdata_unlock(sdata);
2369 2407
2370 if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) 2408 if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
2371 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 2409 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
@@ -2760,6 +2798,178 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
2760 return 0; 2798 return 0;
2761} 2799}
2762 2800
2801static struct cfg80211_beacon_data *
2802cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
2803{
2804 struct cfg80211_beacon_data *new_beacon;
2805 u8 *pos;
2806 int len;
2807
2808 len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len +
2809 beacon->proberesp_ies_len + beacon->assocresp_ies_len +
2810 beacon->probe_resp_len;
2811
2812 new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL);
2813 if (!new_beacon)
2814 return NULL;
2815
2816 pos = (u8 *)(new_beacon + 1);
2817 if (beacon->head_len) {
2818 new_beacon->head_len = beacon->head_len;
2819 new_beacon->head = pos;
2820 memcpy(pos, beacon->head, beacon->head_len);
2821 pos += beacon->head_len;
2822 }
2823 if (beacon->tail_len) {
2824 new_beacon->tail_len = beacon->tail_len;
2825 new_beacon->tail = pos;
2826 memcpy(pos, beacon->tail, beacon->tail_len);
2827 pos += beacon->tail_len;
2828 }
2829 if (beacon->beacon_ies_len) {
2830 new_beacon->beacon_ies_len = beacon->beacon_ies_len;
2831 new_beacon->beacon_ies = pos;
2832 memcpy(pos, beacon->beacon_ies, beacon->beacon_ies_len);
2833 pos += beacon->beacon_ies_len;
2834 }
2835 if (beacon->proberesp_ies_len) {
2836 new_beacon->proberesp_ies_len = beacon->proberesp_ies_len;
2837 new_beacon->proberesp_ies = pos;
2838 memcpy(pos, beacon->proberesp_ies, beacon->proberesp_ies_len);
2839 pos += beacon->proberesp_ies_len;
2840 }
2841 if (beacon->assocresp_ies_len) {
2842 new_beacon->assocresp_ies_len = beacon->assocresp_ies_len;
2843 new_beacon->assocresp_ies = pos;
2844 memcpy(pos, beacon->assocresp_ies, beacon->assocresp_ies_len);
2845 pos += beacon->assocresp_ies_len;
2846 }
2847 if (beacon->probe_resp_len) {
2848 new_beacon->probe_resp_len = beacon->probe_resp_len;
2849 beacon->probe_resp = pos;
2850 memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
2851 pos += beacon->probe_resp_len;
2852 }
2853
2854 return new_beacon;
2855}
2856
2857void ieee80211_csa_finalize_work(struct work_struct *work)
2858{
2859 struct ieee80211_sub_if_data *sdata =
2860 container_of(work, struct ieee80211_sub_if_data,
2861 csa_finalize_work);
2862 struct ieee80211_local *local = sdata->local;
2863 int err, changed;
2864
2865 if (!ieee80211_sdata_running(sdata))
2866 return;
2867
2868 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP))
2869 return;
2870
2871 sdata->radar_required = sdata->csa_radar_required;
2872 err = ieee80211_vif_change_channel(sdata, &local->csa_chandef,
2873 &changed);
2874 if (WARN_ON(err < 0))
2875 return;
2876
2877 err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon);
2878 if (err < 0)
2879 return;
2880
2881 changed |= err;
2882 kfree(sdata->u.ap.next_beacon);
2883 sdata->u.ap.next_beacon = NULL;
2884 sdata->vif.csa_active = false;
2885
2886 ieee80211_wake_queues_by_reason(&sdata->local->hw,
2887 IEEE80211_MAX_QUEUE_MAP,
2888 IEEE80211_QUEUE_STOP_REASON_CSA);
2889
2890 ieee80211_bss_info_change_notify(sdata, changed);
2891
2892 cfg80211_ch_switch_notify(sdata->dev, &local->csa_chandef);
2893}
2894
2895static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
2896 struct cfg80211_csa_settings *params)
2897{
2898 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
2899 struct ieee80211_local *local = sdata->local;
2900 struct ieee80211_chanctx_conf *chanctx_conf;
2901 struct ieee80211_chanctx *chanctx;
2902 int err, num_chanctx;
2903
2904 if (!list_empty(&local->roc_list) || local->scanning)
2905 return -EBUSY;
2906
2907 if (sdata->wdev.cac_started)
2908 return -EBUSY;
2909
2910 if (cfg80211_chandef_identical(&params->chandef,
2911 &sdata->vif.bss_conf.chandef))
2912 return -EINVAL;
2913
2914 rcu_read_lock();
2915 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
2916 if (!chanctx_conf) {
2917 rcu_read_unlock();
2918 return -EBUSY;
2919 }
2920
2921 /* don't handle for multi-VIF cases */
2922 chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
2923 if (chanctx->refcount > 1) {
2924 rcu_read_unlock();
2925 return -EBUSY;
2926 }
2927 num_chanctx = 0;
2928 list_for_each_entry_rcu(chanctx, &local->chanctx_list, list)
2929 num_chanctx++;
2930 rcu_read_unlock();
2931
2932 if (num_chanctx > 1)
2933 return -EBUSY;
2934
2935 /* don't allow another channel switch if one is already active. */
2936 if (sdata->vif.csa_active)
2937 return -EBUSY;
2938
2939 /* only handle AP for now. */
2940 switch (sdata->vif.type) {
2941 case NL80211_IFTYPE_AP:
2942 break;
2943 default:
2944 return -EOPNOTSUPP;
2945 }
2946
2947 sdata->u.ap.next_beacon = cfg80211_beacon_dup(&params->beacon_after);
2948 if (!sdata->u.ap.next_beacon)
2949 return -ENOMEM;
2950
2951 sdata->csa_counter_offset_beacon = params->counter_offset_beacon;
2952 sdata->csa_counter_offset_presp = params->counter_offset_presp;
2953 sdata->csa_radar_required = params->radar_required;
2954
2955 if (params->block_tx)
2956 ieee80211_stop_queues_by_reason(&local->hw,
2957 IEEE80211_MAX_QUEUE_MAP,
2958 IEEE80211_QUEUE_STOP_REASON_CSA);
2959
2960 err = ieee80211_assign_beacon(sdata, &params->beacon_csa);
2961 if (err < 0)
2962 return err;
2963
2964 local->csa_chandef = params->chandef;
2965 sdata->vif.csa_active = true;
2966
2967 ieee80211_bss_info_change_notify(sdata, err);
2968 drv_channel_switch_beacon(sdata, &params->chandef);
2969
2970 return 0;
2971}
2972
2763static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, 2973static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
2764 struct ieee80211_channel *chan, bool offchan, 2974 struct ieee80211_channel *chan, bool offchan,
2765 unsigned int wait, const u8 *buf, size_t len, 2975 unsigned int wait, const u8 *buf, size_t len,
@@ -2803,7 +3013,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
2803 !rcu_access_pointer(sdata->bss->beacon)) 3013 !rcu_access_pointer(sdata->bss->beacon))
2804 need_offchan = true; 3014 need_offchan = true;
2805 if (!ieee80211_is_action(mgmt->frame_control) || 3015 if (!ieee80211_is_action(mgmt->frame_control) ||
2806 mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) 3016 mgmt->u.action.category == WLAN_CATEGORY_PUBLIC ||
3017 mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED)
2807 break; 3018 break;
2808 rcu_read_lock(); 3019 rcu_read_lock();
2809 sta = sta_info_get(sdata, mgmt->da); 3020 sta = sta_info_get(sdata, mgmt->da);
@@ -2823,6 +3034,12 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
2823 return -EOPNOTSUPP; 3034 return -EOPNOTSUPP;
2824 } 3035 }
2825 3036
3037 /* configurations requiring offchan cannot work if no channel has been
3038 * specified
3039 */
3040 if (need_offchan && !chan)
3041 return -EINVAL;
3042
2826 mutex_lock(&local->mtx); 3043 mutex_lock(&local->mtx);
2827 3044
2828 /* Check if the operating channel is the requested channel */ 3045 /* Check if the operating channel is the requested channel */
@@ -2832,10 +3049,15 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
2832 rcu_read_lock(); 3049 rcu_read_lock();
2833 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); 3050 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
2834 3051
2835 if (chanctx_conf) 3052 if (chanctx_conf) {
2836 need_offchan = chan != chanctx_conf->def.chan; 3053 need_offchan = chan && (chan != chanctx_conf->def.chan);
2837 else 3054 } else if (!chan) {
3055 ret = -EINVAL;
3056 rcu_read_unlock();
3057 goto out_unlock;
3058 } else {
2838 need_offchan = true; 3059 need_offchan = true;
3060 }
2839 rcu_read_unlock(); 3061 rcu_read_unlock();
2840 } 3062 }
2841 3063
@@ -2895,19 +3117,8 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
2895 u16 frame_type, bool reg) 3117 u16 frame_type, bool reg)
2896{ 3118{
2897 struct ieee80211_local *local = wiphy_priv(wiphy); 3119 struct ieee80211_local *local = wiphy_priv(wiphy);
2898 struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
2899 3120
2900 switch (frame_type) { 3121 switch (frame_type) {
2901 case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH:
2902 if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
2903 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
2904
2905 if (reg)
2906 ifibss->auth_frame_registrations++;
2907 else
2908 ifibss->auth_frame_registrations--;
2909 }
2910 break;
2911 case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ: 3122 case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ:
2912 if (reg) 3123 if (reg)
2913 local->probe_req_reg++; 3124 local->probe_req_reg++;
@@ -3476,4 +3687,5 @@ struct cfg80211_ops mac80211_config_ops = {
3476 .get_et_strings = ieee80211_get_et_strings, 3687 .get_et_strings = ieee80211_get_et_strings,
3477 .get_channel = ieee80211_cfg_get_channel, 3688 .get_channel = ieee80211_cfg_get_channel,
3478 .start_radar_detection = ieee80211_start_radar_detection, 3689 .start_radar_detection = ieee80211_start_radar_detection,
3690 .channel_switch = ieee80211_channel_switch,
3479}; 3691};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 03e8d2e3270e..3a4764b2869e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -410,6 +410,64 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
410 return ret; 410 return ret;
411} 411}
412 412
413int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
414 const struct cfg80211_chan_def *chandef,
415 u32 *changed)
416{
417 struct ieee80211_local *local = sdata->local;
418 struct ieee80211_chanctx_conf *conf;
419 struct ieee80211_chanctx *ctx;
420 int ret;
421 u32 chanctx_changed = 0;
422
423 /* should never be called if not performing a channel switch. */
424 if (WARN_ON(!sdata->vif.csa_active))
425 return -EINVAL;
426
427 if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
428 IEEE80211_CHAN_DISABLED))
429 return -EINVAL;
430
431 mutex_lock(&local->chanctx_mtx);
432 conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
433 lockdep_is_held(&local->chanctx_mtx));
434 if (!conf) {
435 ret = -EINVAL;
436 goto out;
437 }
438
439 ctx = container_of(conf, struct ieee80211_chanctx, conf);
440 if (ctx->refcount != 1) {
441 ret = -EINVAL;
442 goto out;
443 }
444
445 if (sdata->vif.bss_conf.chandef.width != chandef->width) {
446 chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH;
447 *changed |= BSS_CHANGED_BANDWIDTH;
448 }
449
450 sdata->vif.bss_conf.chandef = *chandef;
451 ctx->conf.def = *chandef;
452
453 chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL;
454 drv_change_chanctx(local, ctx, chanctx_changed);
455
456 if (!local->use_chanctx) {
457 local->_oper_chandef = *chandef;
458 ieee80211_hw_config(local, 0);
459 }
460
461 ieee80211_recalc_chanctx_chantype(local, ctx);
462 ieee80211_recalc_smps_chanctx(local, ctx);
463 ieee80211_recalc_radar_chanctx(local, ctx);
464
465 ret = 0;
466 out:
467 mutex_unlock(&local->chanctx_mtx);
468 return ret;
469}
470
413int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, 471int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
414 const struct cfg80211_chan_def *chandef, 472 const struct cfg80211_chan_def *chandef,
415 u32 *changed) 473 u32 *changed)
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 14abcf44f974..cafe614ef93d 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -228,9 +228,9 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
228 if (sdata->vif.type != NL80211_IFTYPE_STATION) 228 if (sdata->vif.type != NL80211_IFTYPE_STATION)
229 return -EOPNOTSUPP; 229 return -EOPNOTSUPP;
230 230
231 mutex_lock(&sdata->u.mgd.mtx); 231 sdata_lock(sdata);
232 err = __ieee80211_request_smps(sdata, smps_mode); 232 err = __ieee80211_request_smps(sdata, smps_mode);
233 mutex_unlock(&sdata->u.mgd.mtx); 233 sdata_unlock(sdata);
234 234
235 return err; 235 return err;
236} 236}
@@ -313,16 +313,16 @@ static ssize_t ieee80211_if_parse_tkip_mic_test(
313 case NL80211_IFTYPE_STATION: 313 case NL80211_IFTYPE_STATION:
314 fc |= cpu_to_le16(IEEE80211_FCTL_TODS); 314 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
315 /* BSSID SA DA */ 315 /* BSSID SA DA */
316 mutex_lock(&sdata->u.mgd.mtx); 316 sdata_lock(sdata);
317 if (!sdata->u.mgd.associated) { 317 if (!sdata->u.mgd.associated) {
318 mutex_unlock(&sdata->u.mgd.mtx); 318 sdata_unlock(sdata);
319 dev_kfree_skb(skb); 319 dev_kfree_skb(skb);
320 return -ENOTCONN; 320 return -ENOTCONN;
321 } 321 }
322 memcpy(hdr->addr1, sdata->u.mgd.associated->bssid, ETH_ALEN); 322 memcpy(hdr->addr1, sdata->u.mgd.associated->bssid, ETH_ALEN);
323 memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); 323 memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
324 memcpy(hdr->addr3, addr, ETH_ALEN); 324 memcpy(hdr->addr3, addr, ETH_ALEN);
325 mutex_unlock(&sdata->u.mgd.mtx); 325 sdata_unlock(sdata);
326 break; 326 break;
327 default: 327 default:
328 dev_kfree_skb(skb); 328 dev_kfree_skb(skb);
@@ -471,6 +471,8 @@ __IEEE80211_IF_FILE_W(tsf);
471IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); 471IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC);
472 472
473#ifdef CONFIG_MAC80211_MESH 473#ifdef CONFIG_MAC80211_MESH
474IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC);
475
474/* Mesh stats attributes */ 476/* Mesh stats attributes */
475IEEE80211_IF_FILE(fwded_mcast, u.mesh.mshstats.fwded_mcast, DEC); 477IEEE80211_IF_FILE(fwded_mcast, u.mesh.mshstats.fwded_mcast, DEC);
476IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC); 478IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC);
@@ -480,7 +482,6 @@ IEEE80211_IF_FILE(dropped_frames_congestion,
480 u.mesh.mshstats.dropped_frames_congestion, DEC); 482 u.mesh.mshstats.dropped_frames_congestion, DEC);
481IEEE80211_IF_FILE(dropped_frames_no_route, 483IEEE80211_IF_FILE(dropped_frames_no_route,
482 u.mesh.mshstats.dropped_frames_no_route, DEC); 484 u.mesh.mshstats.dropped_frames_no_route, DEC);
483IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC);
484 485
485/* Mesh parameters */ 486/* Mesh parameters */
486IEEE80211_IF_FILE(dot11MeshMaxRetries, 487IEEE80211_IF_FILE(dot11MeshMaxRetries,
@@ -583,6 +584,7 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata)
583static void add_mesh_files(struct ieee80211_sub_if_data *sdata) 584static void add_mesh_files(struct ieee80211_sub_if_data *sdata)
584{ 585{
585 DEBUGFS_ADD_MODE(tsf, 0600); 586 DEBUGFS_ADD_MODE(tsf, 0600);
587 DEBUGFS_ADD_MODE(estab_plinks, 0400);
586} 588}
587 589
588static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) 590static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
@@ -598,7 +600,6 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
598 MESHSTATS_ADD(dropped_frames_ttl); 600 MESHSTATS_ADD(dropped_frames_ttl);
599 MESHSTATS_ADD(dropped_frames_no_route); 601 MESHSTATS_ADD(dropped_frames_no_route);
600 MESHSTATS_ADD(dropped_frames_congestion); 602 MESHSTATS_ADD(dropped_frames_congestion);
601 MESHSTATS_ADD(estab_plinks);
602#undef MESHSTATS_ADD 603#undef MESHSTATS_ADD
603} 604}
604 605
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 44e201d60a13..19c54a44ed47 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -455,6 +455,15 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
455 DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count); 455 DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count);
456 DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count); 456 DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count);
457 457
458 if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
459 debugfs_create_x32("driver_buffered_tids", 0400,
460 sta->debugfs.dir,
461 (u32 *)&sta->driver_buffered_tids);
462 else
463 debugfs_create_x64("driver_buffered_tids", 0400,
464 sta->debugfs.dir,
465 (u64 *)&sta->driver_buffered_tids);
466
458 drv_sta_add_debugfs(local, sdata, &sta->sta, sta->debugfs.dir); 467 drv_sta_add_debugfs(local, sdata, &sta->sta, sta->debugfs.dir);
459} 468}
460 469
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 169664c122e2..b3ea11f3d526 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -146,7 +146,8 @@ static inline int drv_add_interface(struct ieee80211_local *local,
146 146
147 if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || 147 if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
148 (sdata->vif.type == NL80211_IFTYPE_MONITOR && 148 (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
149 !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)))) 149 !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF) &&
150 !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))))
150 return -EINVAL; 151 return -EINVAL;
151 152
152 trace_drv_add_interface(local, sdata); 153 trace_drv_add_interface(local, sdata);
@@ -1071,4 +1072,17 @@ static inline void drv_ipv6_addr_change(struct ieee80211_local *local,
1071} 1072}
1072#endif 1073#endif
1073 1074
1075static inline void
1076drv_channel_switch_beacon(struct ieee80211_sub_if_data *sdata,
1077 struct cfg80211_chan_def *chandef)
1078{
1079 struct ieee80211_local *local = sdata->local;
1080
1081 if (local->ops->channel_switch_beacon) {
1082 trace_drv_channel_switch_beacon(local, sdata, chandef);
1083 local->ops->channel_switch_beacon(&local->hw, &sdata->vif,
1084 chandef);
1085 }
1086}
1087
1074#endif /* __MAC80211_DRIVER_OPS */ 1088#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index af8cee06e4f3..529bf58bc145 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -19,13 +19,14 @@
19#include "ieee80211_i.h" 19#include "ieee80211_i.h"
20#include "rate.h" 20#include "rate.h"
21 21
22static void __check_htcap_disable(struct ieee80211_sub_if_data *sdata, 22static void __check_htcap_disable(struct ieee80211_ht_cap *ht_capa,
23 struct ieee80211_ht_cap *ht_capa_mask,
23 struct ieee80211_sta_ht_cap *ht_cap, 24 struct ieee80211_sta_ht_cap *ht_cap,
24 u16 flag) 25 u16 flag)
25{ 26{
26 __le16 le_flag = cpu_to_le16(flag); 27 __le16 le_flag = cpu_to_le16(flag);
27 if (sdata->u.mgd.ht_capa_mask.cap_info & le_flag) { 28 if (ht_capa_mask->cap_info & le_flag) {
28 if (!(sdata->u.mgd.ht_capa.cap_info & le_flag)) 29 if (!(ht_capa->cap_info & le_flag))
29 ht_cap->cap &= ~flag; 30 ht_cap->cap &= ~flag;
30 } 31 }
31} 32}
@@ -33,13 +34,30 @@ static void __check_htcap_disable(struct ieee80211_sub_if_data *sdata,
33void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, 34void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
34 struct ieee80211_sta_ht_cap *ht_cap) 35 struct ieee80211_sta_ht_cap *ht_cap)
35{ 36{
36 u8 *scaps = (u8 *)(&sdata->u.mgd.ht_capa.mcs.rx_mask); 37 struct ieee80211_ht_cap *ht_capa, *ht_capa_mask;
37 u8 *smask = (u8 *)(&sdata->u.mgd.ht_capa_mask.mcs.rx_mask); 38 u8 *scaps, *smask;
38 int i; 39 int i;
39 40
40 if (!ht_cap->ht_supported) 41 if (!ht_cap->ht_supported)
41 return; 42 return;
42 43
44 switch (sdata->vif.type) {
45 case NL80211_IFTYPE_STATION:
46 ht_capa = &sdata->u.mgd.ht_capa;
47 ht_capa_mask = &sdata->u.mgd.ht_capa_mask;
48 break;
49 case NL80211_IFTYPE_ADHOC:
50 ht_capa = &sdata->u.ibss.ht_capa;
51 ht_capa_mask = &sdata->u.ibss.ht_capa_mask;
52 break;
53 default:
54 WARN_ON_ONCE(1);
55 return;
56 }
57
58 scaps = (u8 *)(&ht_capa->mcs.rx_mask);
59 smask = (u8 *)(&ht_capa_mask->mcs.rx_mask);
60
43 /* NOTE: If you add more over-rides here, update register_hw 61 /* NOTE: If you add more over-rides here, update register_hw
44 * ht_capa_mod_msk logic in main.c as well. 62 * ht_capa_mod_msk logic in main.c as well.
45 * And, if this method can ever change ht_cap.ht_supported, fix 63 * And, if this method can ever change ht_cap.ht_supported, fix
@@ -55,28 +73,32 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
55 } 73 }
56 74
57 /* Force removal of HT-40 capabilities? */ 75 /* Force removal of HT-40 capabilities? */
58 __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SUP_WIDTH_20_40); 76 __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
59 __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_40); 77 IEEE80211_HT_CAP_SUP_WIDTH_20_40);
78 __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
79 IEEE80211_HT_CAP_SGI_40);
60 80
61 /* Allow user to disable SGI-20 (SGI-40 is handled above) */ 81 /* Allow user to disable SGI-20 (SGI-40 is handled above) */
62 __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_20); 82 __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
83 IEEE80211_HT_CAP_SGI_20);
63 84
64 /* Allow user to disable the max-AMSDU bit. */ 85 /* Allow user to disable the max-AMSDU bit. */
65 __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); 86 __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
87 IEEE80211_HT_CAP_MAX_AMSDU);
66 88
67 /* Allow user to decrease AMPDU factor */ 89 /* Allow user to decrease AMPDU factor */
68 if (sdata->u.mgd.ht_capa_mask.ampdu_params_info & 90 if (ht_capa_mask->ampdu_params_info &
69 IEEE80211_HT_AMPDU_PARM_FACTOR) { 91 IEEE80211_HT_AMPDU_PARM_FACTOR) {
70 u8 n = sdata->u.mgd.ht_capa.ampdu_params_info 92 u8 n = ht_capa->ampdu_params_info &
71 & IEEE80211_HT_AMPDU_PARM_FACTOR; 93 IEEE80211_HT_AMPDU_PARM_FACTOR;
72 if (n < ht_cap->ampdu_factor) 94 if (n < ht_cap->ampdu_factor)
73 ht_cap->ampdu_factor = n; 95 ht_cap->ampdu_factor = n;
74 } 96 }
75 97
76 /* Allow the user to increase AMPDU density. */ 98 /* Allow the user to increase AMPDU density. */
77 if (sdata->u.mgd.ht_capa_mask.ampdu_params_info & 99 if (ht_capa_mask->ampdu_params_info &
78 IEEE80211_HT_AMPDU_PARM_DENSITY) { 100 IEEE80211_HT_AMPDU_PARM_DENSITY) {
79 u8 n = (sdata->u.mgd.ht_capa.ampdu_params_info & 101 u8 n = (ht_capa->ampdu_params_info &
80 IEEE80211_HT_AMPDU_PARM_DENSITY) 102 IEEE80211_HT_AMPDU_PARM_DENSITY)
81 >> IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT; 103 >> IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT;
82 if (n > ht_cap->ampdu_density) 104 if (n > ht_cap->ampdu_density)
@@ -112,7 +134,8 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
112 * we advertised a restricted capability set to. Override 134 * we advertised a restricted capability set to. Override
113 * our own capabilities and then use those below. 135 * our own capabilities and then use those below.
114 */ 136 */
115 if (sdata->vif.type == NL80211_IFTYPE_STATION && 137 if ((sdata->vif.type == NL80211_IFTYPE_STATION ||
138 sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
116 !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) 139 !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
117 ieee80211_apply_htcap_overrides(sdata, &own_cap); 140 ieee80211_apply_htcap_overrides(sdata, &own_cap);
118 141
@@ -281,13 +304,14 @@ void ieee80211_ba_session_work(struct work_struct *work)
281 sta, tid, WLAN_BACK_RECIPIENT, 304 sta, tid, WLAN_BACK_RECIPIENT,
282 WLAN_REASON_UNSPECIFIED, true); 305 WLAN_REASON_UNSPECIFIED, true);
283 306
307 spin_lock_bh(&sta->lock);
308
284 tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; 309 tid_tx = sta->ampdu_mlme.tid_start_tx[tid];
285 if (tid_tx) { 310 if (tid_tx) {
286 /* 311 /*
287 * Assign it over to the normal tid_tx array 312 * Assign it over to the normal tid_tx array
288 * where it "goes live". 313 * where it "goes live".
289 */ 314 */
290 spin_lock_bh(&sta->lock);
291 315
292 sta->ampdu_mlme.tid_start_tx[tid] = NULL; 316 sta->ampdu_mlme.tid_start_tx[tid] = NULL;
293 /* could there be a race? */ 317 /* could there be a race? */
@@ -300,6 +324,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
300 ieee80211_tx_ba_session_handle_start(sta, tid); 324 ieee80211_tx_ba_session_handle_start(sta, tid);
301 continue; 325 continue;
302 } 326 }
327 spin_unlock_bh(&sta->lock);
303 328
304 tid_tx = rcu_dereference_protected_tid_tx(sta, tid); 329 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
305 if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP, 330 if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
@@ -429,9 +454,9 @@ void ieee80211_request_smps_work(struct work_struct *work)
429 container_of(work, struct ieee80211_sub_if_data, 454 container_of(work, struct ieee80211_sub_if_data,
430 u.mgd.request_smps_work); 455 u.mgd.request_smps_work);
431 456
432 mutex_lock(&sdata->u.mgd.mtx); 457 sdata_lock(sdata);
433 __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode); 458 __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode);
434 mutex_unlock(&sdata->u.mgd.mtx); 459 sdata_unlock(sdata);
435} 460}
436 461
437void ieee80211_request_smps(struct ieee80211_vif *vif, 462void ieee80211_request_smps(struct ieee80211_vif *vif,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 170f9a7fa319..a12afe77bb26 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -30,75 +30,27 @@
30 30
31#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) 31#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
32#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) 32#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
33#define IEEE80211_IBSS_RSN_INACTIVITY_LIMIT (10 * HZ)
33 34
34#define IEEE80211_IBSS_MAX_STA_ENTRIES 128 35#define IEEE80211_IBSS_MAX_STA_ENTRIES 128
35 36
36 37static struct beacon_data *
37static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, 38ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
38 const u8 *bssid, const int beacon_int, 39 const int beacon_int, const u32 basic_rates,
39 struct ieee80211_channel *chan, 40 const u16 capability, u64 tsf,
40 const u32 basic_rates, 41 struct cfg80211_chan_def *chandef,
41 const u16 capability, u64 tsf, 42 bool *have_higher_than_11mbit)
42 bool creator)
43{ 43{
44 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 44 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
45 struct ieee80211_local *local = sdata->local; 45 struct ieee80211_local *local = sdata->local;
46 int rates, i; 46 int rates_n = 0, i, ri;
47 struct ieee80211_mgmt *mgmt; 47 struct ieee80211_mgmt *mgmt;
48 u8 *pos; 48 u8 *pos;
49 struct ieee80211_supported_band *sband; 49 struct ieee80211_supported_band *sband;
50 struct cfg80211_bss *bss; 50 u32 rate_flags, rates = 0, rates_added = 0;
51 u32 bss_change;
52 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
53 struct cfg80211_chan_def chandef;
54 struct beacon_data *presp; 51 struct beacon_data *presp;
55 int frame_len; 52 int frame_len;
56 53 int shift;
57 lockdep_assert_held(&ifibss->mtx);
58
59 /* Reset own TSF to allow time synchronization work. */
60 drv_reset_tsf(local, sdata);
61
62 if (!ether_addr_equal(ifibss->bssid, bssid))
63 sta_info_flush(sdata);
64
65 /* if merging, indicate to driver that we leave the old IBSS */
66 if (sdata->vif.bss_conf.ibss_joined) {
67 sdata->vif.bss_conf.ibss_joined = false;
68 sdata->vif.bss_conf.ibss_creator = false;
69 sdata->vif.bss_conf.enable_beacon = false;
70 netif_carrier_off(sdata->dev);
71 ieee80211_bss_info_change_notify(sdata,
72 BSS_CHANGED_IBSS |
73 BSS_CHANGED_BEACON_ENABLED);
74 }
75
76 presp = rcu_dereference_protected(ifibss->presp,
77 lockdep_is_held(&ifibss->mtx));
78 rcu_assign_pointer(ifibss->presp, NULL);
79 if (presp)
80 kfree_rcu(presp, rcu_head);
81
82 sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
83
84 cfg80211_chandef_create(&chandef, chan, ifibss->channel_type);
85 if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
86 chandef.width = NL80211_CHAN_WIDTH_20;
87 chandef.center_freq1 = chan->center_freq;
88 }
89
90 ieee80211_vif_release_channel(sdata);
91 if (ieee80211_vif_use_channel(sdata, &chandef,
92 ifibss->fixed_channel ?
93 IEEE80211_CHANCTX_SHARED :
94 IEEE80211_CHANCTX_EXCLUSIVE)) {
95 sdata_info(sdata, "Failed to join IBSS, no channel context\n");
96 return;
97 }
98
99 memcpy(ifibss->bssid, bssid, ETH_ALEN);
100
101 sband = local->hw.wiphy->bands[chan->band];
102 54
103 /* Build IBSS probe response */ 55 /* Build IBSS probe response */
104 frame_len = sizeof(struct ieee80211_hdr_3addr) + 56 frame_len = sizeof(struct ieee80211_hdr_3addr) +
@@ -113,7 +65,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
113 ifibss->ie_len; 65 ifibss->ie_len;
114 presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL); 66 presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL);
115 if (!presp) 67 if (!presp)
116 return; 68 return NULL;
117 69
118 presp->head = (void *)(presp + 1); 70 presp->head = (void *)(presp + 1);
119 71
@@ -134,21 +86,47 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
134 memcpy(pos, ifibss->ssid, ifibss->ssid_len); 86 memcpy(pos, ifibss->ssid, ifibss->ssid_len);
135 pos += ifibss->ssid_len; 87 pos += ifibss->ssid_len;
136 88
137 rates = min_t(int, 8, sband->n_bitrates); 89 sband = local->hw.wiphy->bands[chandef->chan->band];
90 rate_flags = ieee80211_chandef_rate_flags(chandef);
91 shift = ieee80211_chandef_get_shift(chandef);
92 rates_n = 0;
93 if (have_higher_than_11mbit)
94 *have_higher_than_11mbit = false;
95
96 for (i = 0; i < sband->n_bitrates; i++) {
97 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
98 continue;
99 if (sband->bitrates[i].bitrate > 110 &&
100 have_higher_than_11mbit)
101 *have_higher_than_11mbit = true;
102
103 rates |= BIT(i);
104 rates_n++;
105 }
106
138 *pos++ = WLAN_EID_SUPP_RATES; 107 *pos++ = WLAN_EID_SUPP_RATES;
139 *pos++ = rates; 108 *pos++ = min_t(int, 8, rates_n);
140 for (i = 0; i < rates; i++) { 109 for (ri = 0; ri < sband->n_bitrates; ri++) {
141 int rate = sband->bitrates[i].bitrate; 110 int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate,
111 5 * (1 << shift));
142 u8 basic = 0; 112 u8 basic = 0;
143 if (basic_rates & BIT(i)) 113 if (!(rates & BIT(ri)))
114 continue;
115
116 if (basic_rates & BIT(ri))
144 basic = 0x80; 117 basic = 0x80;
145 *pos++ = basic | (u8) (rate / 5); 118 *pos++ = basic | (u8) rate;
119 if (++rates_added == 8) {
120 ri++; /* continue at next rate for EXT_SUPP_RATES */
121 break;
122 }
146 } 123 }
147 124
148 if (sband->band == IEEE80211_BAND_2GHZ) { 125 if (sband->band == IEEE80211_BAND_2GHZ) {
149 *pos++ = WLAN_EID_DS_PARAMS; 126 *pos++ = WLAN_EID_DS_PARAMS;
150 *pos++ = 1; 127 *pos++ = 1;
151 *pos++ = ieee80211_frequency_to_channel(chan->center_freq); 128 *pos++ = ieee80211_frequency_to_channel(
129 chandef->chan->center_freq);
152 } 130 }
153 131
154 *pos++ = WLAN_EID_IBSS_PARAMS; 132 *pos++ = WLAN_EID_IBSS_PARAMS;
@@ -157,15 +135,20 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
157 *pos++ = 0; 135 *pos++ = 0;
158 *pos++ = 0; 136 *pos++ = 0;
159 137
160 if (sband->n_bitrates > 8) { 138 /* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */
139 if (rates_n > 8) {
161 *pos++ = WLAN_EID_EXT_SUPP_RATES; 140 *pos++ = WLAN_EID_EXT_SUPP_RATES;
162 *pos++ = sband->n_bitrates - 8; 141 *pos++ = rates_n - 8;
163 for (i = 8; i < sband->n_bitrates; i++) { 142 for (; ri < sband->n_bitrates; ri++) {
164 int rate = sband->bitrates[i].bitrate; 143 int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate,
144 5 * (1 << shift));
165 u8 basic = 0; 145 u8 basic = 0;
166 if (basic_rates & BIT(i)) 146 if (!(rates & BIT(ri)))
147 continue;
148
149 if (basic_rates & BIT(ri))
167 basic = 0x80; 150 basic = 0x80;
168 *pos++ = basic | (u8) (rate / 5); 151 *pos++ = basic | (u8) rate;
169 } 152 }
170 } 153 }
171 154
@@ -175,17 +158,23 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
175 } 158 }
176 159
177 /* add HT capability and information IEs */ 160 /* add HT capability and information IEs */
178 if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT && 161 if (chandef->width != NL80211_CHAN_WIDTH_20_NOHT &&
162 chandef->width != NL80211_CHAN_WIDTH_5 &&
163 chandef->width != NL80211_CHAN_WIDTH_10 &&
179 sband->ht_cap.ht_supported) { 164 sband->ht_cap.ht_supported) {
180 pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, 165 struct ieee80211_sta_ht_cap ht_cap;
181 sband->ht_cap.cap); 166
167 memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap));
168 ieee80211_apply_htcap_overrides(sdata, &ht_cap);
169
170 pos = ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
182 /* 171 /*
183 * Note: According to 802.11n-2009 9.13.3.1, HT Protection 172 * Note: According to 802.11n-2009 9.13.3.1, HT Protection
184 * field and RIFS Mode are reserved in IBSS mode, therefore 173 * field and RIFS Mode are reserved in IBSS mode, therefore
185 * keep them at 0 174 * keep them at 0
186 */ 175 */
187 pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap, 176 pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap,
188 &chandef, 0); 177 chandef, 0);
189 } 178 }
190 179
191 if (local->hw.queues >= IEEE80211_NUM_ACS) { 180 if (local->hw.queues >= IEEE80211_NUM_ACS) {
@@ -202,9 +191,97 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
202 191
203 presp->head_len = pos - presp->head; 192 presp->head_len = pos - presp->head;
204 if (WARN_ON(presp->head_len > frame_len)) 193 if (WARN_ON(presp->head_len > frame_len))
194 goto error;
195
196 return presp;
197error:
198 kfree(presp);
199 return NULL;
200}
201
202static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
203 const u8 *bssid, const int beacon_int,
204 struct cfg80211_chan_def *req_chandef,
205 const u32 basic_rates,
206 const u16 capability, u64 tsf,
207 bool creator)
208{
209 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
210 struct ieee80211_local *local = sdata->local;
211 struct ieee80211_supported_band *sband;
212 struct ieee80211_mgmt *mgmt;
213 struct cfg80211_bss *bss;
214 u32 bss_change;
215 struct cfg80211_chan_def chandef;
216 struct ieee80211_channel *chan;
217 struct beacon_data *presp;
218 enum nl80211_bss_scan_width scan_width;
219 bool have_higher_than_11mbit;
220
221 sdata_assert_lock(sdata);
222
223 /* Reset own TSF to allow time synchronization work. */
224 drv_reset_tsf(local, sdata);
225
226 if (!ether_addr_equal(ifibss->bssid, bssid))
227 sta_info_flush(sdata);
228
229 /* if merging, indicate to driver that we leave the old IBSS */
230 if (sdata->vif.bss_conf.ibss_joined) {
231 sdata->vif.bss_conf.ibss_joined = false;
232 sdata->vif.bss_conf.ibss_creator = false;
233 sdata->vif.bss_conf.enable_beacon = false;
234 netif_carrier_off(sdata->dev);
235 ieee80211_bss_info_change_notify(sdata,
236 BSS_CHANGED_IBSS |
237 BSS_CHANGED_BEACON_ENABLED);
238 }
239
240 presp = rcu_dereference_protected(ifibss->presp,
241 lockdep_is_held(&sdata->wdev.mtx));
242 rcu_assign_pointer(ifibss->presp, NULL);
243 if (presp)
244 kfree_rcu(presp, rcu_head);
245
246 sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
247
248 /* make a copy of the chandef, it could be modified below. */
249 chandef = *req_chandef;
250 chan = chandef.chan;
251 if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
252 if (chandef.width == NL80211_CHAN_WIDTH_5 ||
253 chandef.width == NL80211_CHAN_WIDTH_10 ||
254 chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
255 chandef.width == NL80211_CHAN_WIDTH_20) {
256 sdata_info(sdata,
257 "Failed to join IBSS, beacons forbidden\n");
258 return;
259 }
260 chandef.width = NL80211_CHAN_WIDTH_20;
261 chandef.center_freq1 = chan->center_freq;
262 }
263
264 ieee80211_vif_release_channel(sdata);
265 if (ieee80211_vif_use_channel(sdata, &chandef,
266 ifibss->fixed_channel ?
267 IEEE80211_CHANCTX_SHARED :
268 IEEE80211_CHANCTX_EXCLUSIVE)) {
269 sdata_info(sdata, "Failed to join IBSS, no channel context\n");
270 return;
271 }
272
273 memcpy(ifibss->bssid, bssid, ETH_ALEN);
274
275 sband = local->hw.wiphy->bands[chan->band];
276
277 presp = ieee80211_ibss_build_presp(sdata, beacon_int, basic_rates,
278 capability, tsf, &chandef,
279 &have_higher_than_11mbit);
280 if (!presp)
205 return; 281 return;
206 282
207 rcu_assign_pointer(ifibss->presp, presp); 283 rcu_assign_pointer(ifibss->presp, presp);
284 mgmt = (void *)presp->head;
208 285
209 sdata->vif.bss_conf.enable_beacon = true; 286 sdata->vif.bss_conf.enable_beacon = true;
210 sdata->vif.bss_conf.beacon_int = beacon_int; 287 sdata->vif.bss_conf.beacon_int = beacon_int;
@@ -234,18 +311,26 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
234 sdata->vif.bss_conf.use_short_slot = chan->band == IEEE80211_BAND_5GHZ; 311 sdata->vif.bss_conf.use_short_slot = chan->band == IEEE80211_BAND_5GHZ;
235 bss_change |= BSS_CHANGED_ERP_SLOT; 312 bss_change |= BSS_CHANGED_ERP_SLOT;
236 313
314 /* cf. IEEE 802.11 9.2.12 */
315 if (chan->band == IEEE80211_BAND_2GHZ && have_higher_than_11mbit)
316 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
317 else
318 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
319
237 sdata->vif.bss_conf.ibss_joined = true; 320 sdata->vif.bss_conf.ibss_joined = true;
238 sdata->vif.bss_conf.ibss_creator = creator; 321 sdata->vif.bss_conf.ibss_creator = creator;
239 ieee80211_bss_info_change_notify(sdata, bss_change); 322 ieee80211_bss_info_change_notify(sdata, bss_change);
240 323
241 ieee80211_sta_def_wmm_params(sdata, sband->n_bitrates, supp_rates); 324 ieee80211_set_wmm_default(sdata, true);
242 325
243 ifibss->state = IEEE80211_IBSS_MLME_JOINED; 326 ifibss->state = IEEE80211_IBSS_MLME_JOINED;
244 mod_timer(&ifibss->timer, 327 mod_timer(&ifibss->timer,
245 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); 328 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
246 329
247 bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan, 330 scan_width = cfg80211_chandef_to_scan_width(&chandef);
248 mgmt, presp->head_len, 0, GFP_KERNEL); 331 bss = cfg80211_inform_bss_width_frame(local->hw.wiphy, chan,
332 scan_width, mgmt,
333 presp->head_len, 0, GFP_KERNEL);
249 cfg80211_put_bss(local->hw.wiphy, bss); 334 cfg80211_put_bss(local->hw.wiphy, bss);
250 netif_carrier_on(sdata->dev); 335 netif_carrier_on(sdata->dev);
251 cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL); 336 cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL);
@@ -257,27 +342,60 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
257 struct cfg80211_bss *cbss = 342 struct cfg80211_bss *cbss =
258 container_of((void *)bss, struct cfg80211_bss, priv); 343 container_of((void *)bss, struct cfg80211_bss, priv);
259 struct ieee80211_supported_band *sband; 344 struct ieee80211_supported_band *sband;
345 struct cfg80211_chan_def chandef;
260 u32 basic_rates; 346 u32 basic_rates;
261 int i, j; 347 int i, j;
262 u16 beacon_int = cbss->beacon_interval; 348 u16 beacon_int = cbss->beacon_interval;
263 const struct cfg80211_bss_ies *ies; 349 const struct cfg80211_bss_ies *ies;
350 enum nl80211_channel_type chan_type;
264 u64 tsf; 351 u64 tsf;
352 u32 rate_flags;
353 int shift;
265 354
266 lockdep_assert_held(&sdata->u.ibss.mtx); 355 sdata_assert_lock(sdata);
267 356
268 if (beacon_int < 10) 357 if (beacon_int < 10)
269 beacon_int = 10; 358 beacon_int = 10;
270 359
360 switch (sdata->u.ibss.chandef.width) {
361 case NL80211_CHAN_WIDTH_20_NOHT:
362 case NL80211_CHAN_WIDTH_20:
363 case NL80211_CHAN_WIDTH_40:
364 chan_type = cfg80211_get_chandef_type(&sdata->u.ibss.chandef);
365 cfg80211_chandef_create(&chandef, cbss->channel, chan_type);
366 break;
367 case NL80211_CHAN_WIDTH_5:
368 case NL80211_CHAN_WIDTH_10:
369 cfg80211_chandef_create(&chandef, cbss->channel,
370 NL80211_CHAN_WIDTH_20_NOHT);
371 chandef.width = sdata->u.ibss.chandef.width;
372 break;
373 default:
374 /* fall back to 20 MHz for unsupported modes */
375 cfg80211_chandef_create(&chandef, cbss->channel,
376 NL80211_CHAN_WIDTH_20_NOHT);
377 break;
378 }
379
271 sband = sdata->local->hw.wiphy->bands[cbss->channel->band]; 380 sband = sdata->local->hw.wiphy->bands[cbss->channel->band];
381 rate_flags = ieee80211_chandef_rate_flags(&sdata->u.ibss.chandef);
382 shift = ieee80211_vif_get_shift(&sdata->vif);
272 383
273 basic_rates = 0; 384 basic_rates = 0;
274 385
275 for (i = 0; i < bss->supp_rates_len; i++) { 386 for (i = 0; i < bss->supp_rates_len; i++) {
276 int rate = (bss->supp_rates[i] & 0x7f) * 5; 387 int rate = bss->supp_rates[i] & 0x7f;
277 bool is_basic = !!(bss->supp_rates[i] & 0x80); 388 bool is_basic = !!(bss->supp_rates[i] & 0x80);
278 389
279 for (j = 0; j < sband->n_bitrates; j++) { 390 for (j = 0; j < sband->n_bitrates; j++) {
280 if (sband->bitrates[j].bitrate == rate) { 391 int brate;
392 if ((rate_flags & sband->bitrates[j].flags)
393 != rate_flags)
394 continue;
395
396 brate = DIV_ROUND_UP(sband->bitrates[j].bitrate,
397 5 * (1 << shift));
398 if (brate == rate) {
281 if (is_basic) 399 if (is_basic)
282 basic_rates |= BIT(j); 400 basic_rates |= BIT(j);
283 break; 401 break;
@@ -292,14 +410,13 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
292 410
293 __ieee80211_sta_join_ibss(sdata, cbss->bssid, 411 __ieee80211_sta_join_ibss(sdata, cbss->bssid,
294 beacon_int, 412 beacon_int,
295 cbss->channel, 413 &chandef,
296 basic_rates, 414 basic_rates,
297 cbss->capability, 415 cbss->capability,
298 tsf, false); 416 tsf, false);
299} 417}
300 418
301static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, 419static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta)
302 bool auth)
303 __acquires(RCU) 420 __acquires(RCU)
304{ 421{
305 struct ieee80211_sub_if_data *sdata = sta->sdata; 422 struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -321,26 +438,20 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta,
321 /* If it fails, maybe we raced another insertion? */ 438 /* If it fails, maybe we raced another insertion? */
322 if (sta_info_insert_rcu(sta)) 439 if (sta_info_insert_rcu(sta))
323 return sta_info_get(sdata, addr); 440 return sta_info_get(sdata, addr);
324 if (auth && !sdata->u.ibss.auth_frame_registrations) {
325 ibss_dbg(sdata,
326 "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n",
327 sdata->vif.addr, addr, sdata->u.ibss.bssid);
328 ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, 0, NULL, 0,
329 addr, sdata->u.ibss.bssid, NULL, 0, 0, 0);
330 }
331 return sta; 441 return sta;
332} 442}
333 443
334static struct sta_info * 444static struct sta_info *
335ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, 445ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
336 const u8 *bssid, const u8 *addr, 446 const u8 *addr, u32 supp_rates)
337 u32 supp_rates, bool auth)
338 __acquires(RCU) 447 __acquires(RCU)
339{ 448{
340 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 449 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
341 struct ieee80211_local *local = sdata->local; 450 struct ieee80211_local *local = sdata->local;
342 struct sta_info *sta; 451 struct sta_info *sta;
343 struct ieee80211_chanctx_conf *chanctx_conf; 452 struct ieee80211_chanctx_conf *chanctx_conf;
453 struct ieee80211_supported_band *sband;
454 enum nl80211_bss_scan_width scan_width;
344 int band; 455 int band;
345 456
346 /* 457 /*
@@ -369,6 +480,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
369 if (WARN_ON_ONCE(!chanctx_conf)) 480 if (WARN_ON_ONCE(!chanctx_conf))
370 return NULL; 481 return NULL;
371 band = chanctx_conf->def.chan->band; 482 band = chanctx_conf->def.chan->band;
483 scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def);
372 rcu_read_unlock(); 484 rcu_read_unlock();
373 485
374 sta = sta_info_alloc(sdata, addr, GFP_KERNEL); 486 sta = sta_info_alloc(sdata, addr, GFP_KERNEL);
@@ -380,10 +492,11 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
380 sta->last_rx = jiffies; 492 sta->last_rx = jiffies;
381 493
382 /* make sure mandatory rates are always added */ 494 /* make sure mandatory rates are always added */
495 sband = local->hw.wiphy->bands[band];
383 sta->sta.supp_rates[band] = supp_rates | 496 sta->sta.supp_rates[band] = supp_rates |
384 ieee80211_mandatory_rates(local, band); 497 ieee80211_mandatory_rates(sband, scan_width);
385 498
386 return ieee80211_ibss_finish_sta(sta, auth); 499 return ieee80211_ibss_finish_sta(sta);
387} 500}
388 501
389static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, 502static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata,
@@ -405,10 +518,8 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
405 size_t len) 518 size_t len)
406{ 519{
407 u16 auth_alg, auth_transaction; 520 u16 auth_alg, auth_transaction;
408 struct sta_info *sta;
409 u8 deauth_frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
410 521
411 lockdep_assert_held(&sdata->u.ibss.mtx); 522 sdata_assert_lock(sdata);
412 523
413 if (len < 24 + 6) 524 if (len < 24 + 6)
414 return; 525 return;
@@ -423,22 +534,6 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
423 if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) 534 if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1)
424 return; 535 return;
425 536
426 sta_info_destroy_addr(sdata, mgmt->sa);
427 sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false);
428 rcu_read_unlock();
429
430 /*
431 * if we have any problem in allocating the new station, we reply with a
432 * DEAUTH frame to tell the other end that we had a problem
433 */
434 if (!sta) {
435 ieee80211_send_deauth_disassoc(sdata, sdata->u.ibss.bssid,
436 IEEE80211_STYPE_DEAUTH,
437 WLAN_REASON_UNSPECIFIED, true,
438 deauth_frame_buf);
439 return;
440 }
441
442 /* 537 /*
443 * IEEE 802.11 standard does not require authentication in IBSS 538 * IEEE 802.11 standard does not require authentication in IBSS
444 * networks and most implementations do not seem to use it. 539 * networks and most implementations do not seem to use it.
@@ -463,6 +558,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
463 u64 beacon_timestamp, rx_timestamp; 558 u64 beacon_timestamp, rx_timestamp;
464 u32 supp_rates = 0; 559 u32 supp_rates = 0;
465 enum ieee80211_band band = rx_status->band; 560 enum ieee80211_band band = rx_status->band;
561 enum nl80211_bss_scan_width scan_width;
466 struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; 562 struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
467 bool rates_updated = false; 563 bool rates_updated = false;
468 564
@@ -484,16 +580,22 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
484 sta = sta_info_get(sdata, mgmt->sa); 580 sta = sta_info_get(sdata, mgmt->sa);
485 581
486 if (elems->supp_rates) { 582 if (elems->supp_rates) {
487 supp_rates = ieee80211_sta_get_rates(local, elems, 583 supp_rates = ieee80211_sta_get_rates(sdata, elems,
488 band, NULL); 584 band, NULL);
489 if (sta) { 585 if (sta) {
490 u32 prev_rates; 586 u32 prev_rates;
491 587
492 prev_rates = sta->sta.supp_rates[band]; 588 prev_rates = sta->sta.supp_rates[band];
493 /* make sure mandatory rates are always added */ 589 /* make sure mandatory rates are always added */
494 sta->sta.supp_rates[band] = supp_rates | 590 scan_width = NL80211_BSS_CHAN_WIDTH_20;
495 ieee80211_mandatory_rates(local, band); 591 if (rx_status->flag & RX_FLAG_5MHZ)
592 scan_width = NL80211_BSS_CHAN_WIDTH_5;
593 if (rx_status->flag & RX_FLAG_10MHZ)
594 scan_width = NL80211_BSS_CHAN_WIDTH_10;
496 595
596 sta->sta.supp_rates[band] = supp_rates |
597 ieee80211_mandatory_rates(sband,
598 scan_width);
497 if (sta->sta.supp_rates[band] != prev_rates) { 599 if (sta->sta.supp_rates[band] != prev_rates) {
498 ibss_dbg(sdata, 600 ibss_dbg(sdata,
499 "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", 601 "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n",
@@ -504,7 +606,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
504 } else { 606 } else {
505 rcu_read_unlock(); 607 rcu_read_unlock();
506 sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, 608 sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid,
507 mgmt->sa, supp_rates, true); 609 mgmt->sa, supp_rates);
508 } 610 }
509 } 611 }
510 612
@@ -512,7 +614,9 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
512 set_sta_flag(sta, WLAN_STA_WME); 614 set_sta_flag(sta, WLAN_STA_WME);
513 615
514 if (sta && elems->ht_operation && elems->ht_cap_elem && 616 if (sta && elems->ht_operation && elems->ht_cap_elem &&
515 sdata->u.ibss.channel_type != NL80211_CHAN_NO_HT) { 617 sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&
618 sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_5 &&
619 sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_10) {
516 /* we both use HT */ 620 /* we both use HT */
517 struct ieee80211_ht_cap htcap_ie; 621 struct ieee80211_ht_cap htcap_ie;
518 struct cfg80211_chan_def chandef; 622 struct cfg80211_chan_def chandef;
@@ -527,8 +631,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
527 * fall back to HT20 if we don't use or use 631 * fall back to HT20 if we don't use or use
528 * the other extension channel 632 * the other extension channel
529 */ 633 */
530 if (cfg80211_get_chandef_type(&chandef) != 634 if (chandef.center_freq1 !=
531 sdata->u.ibss.channel_type) 635 sdata->u.ibss.chandef.center_freq1)
532 htcap_ie.cap_info &= 636 htcap_ie.cap_info &=
533 cpu_to_le16(~IEEE80211_HT_CAP_SUP_WIDTH_20_40); 637 cpu_to_le16(~IEEE80211_HT_CAP_SUP_WIDTH_20_40);
534 638
@@ -567,7 +671,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
567 671
568 /* different channel */ 672 /* different channel */
569 if (sdata->u.ibss.fixed_channel && 673 if (sdata->u.ibss.fixed_channel &&
570 sdata->u.ibss.channel != cbss->channel) 674 sdata->u.ibss.chandef.chan != cbss->channel)
571 goto put_bss; 675 goto put_bss;
572 676
573 /* different SSID */ 677 /* different SSID */
@@ -606,9 +710,9 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
606 "beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", 710 "beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n",
607 mgmt->bssid); 711 mgmt->bssid);
608 ieee80211_sta_join_ibss(sdata, bss); 712 ieee80211_sta_join_ibss(sdata, bss);
609 supp_rates = ieee80211_sta_get_rates(local, elems, band, NULL); 713 supp_rates = ieee80211_sta_get_rates(sdata, elems, band, NULL);
610 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 714 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
611 supp_rates, true); 715 supp_rates);
612 rcu_read_unlock(); 716 rcu_read_unlock();
613 } 717 }
614 718
@@ -624,6 +728,8 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
624 struct ieee80211_local *local = sdata->local; 728 struct ieee80211_local *local = sdata->local;
625 struct sta_info *sta; 729 struct sta_info *sta;
626 struct ieee80211_chanctx_conf *chanctx_conf; 730 struct ieee80211_chanctx_conf *chanctx_conf;
731 struct ieee80211_supported_band *sband;
732 enum nl80211_bss_scan_width scan_width;
627 int band; 733 int band;
628 734
629 /* 735 /*
@@ -649,6 +755,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
649 return; 755 return;
650 } 756 }
651 band = chanctx_conf->def.chan->band; 757 band = chanctx_conf->def.chan->band;
758 scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def);
652 rcu_read_unlock(); 759 rcu_read_unlock();
653 760
654 sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); 761 sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
@@ -658,8 +765,9 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
658 sta->last_rx = jiffies; 765 sta->last_rx = jiffies;
659 766
660 /* make sure mandatory rates are always added */ 767 /* make sure mandatory rates are always added */
768 sband = local->hw.wiphy->bands[band];
661 sta->sta.supp_rates[band] = supp_rates | 769 sta->sta.supp_rates[band] = supp_rates |
662 ieee80211_mandatory_rates(local, band); 770 ieee80211_mandatory_rates(sband, scan_width);
663 771
664 spin_lock(&ifibss->incomplete_lock); 772 spin_lock(&ifibss->incomplete_lock);
665 list_add(&sta->list, &ifibss->incomplete_stations); 773 list_add(&sta->list, &ifibss->incomplete_stations);
@@ -673,7 +781,7 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
673 int active = 0; 781 int active = 0;
674 struct sta_info *sta; 782 struct sta_info *sta;
675 783
676 lockdep_assert_held(&sdata->u.ibss.mtx); 784 sdata_assert_lock(sdata);
677 785
678 rcu_read_lock(); 786 rcu_read_lock();
679 787
@@ -691,6 +799,33 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
691 return active; 799 return active;
692} 800}
693 801
802static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
803{
804 struct ieee80211_local *local = sdata->local;
805 struct sta_info *sta, *tmp;
806 unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT;
807 unsigned long exp_rsn_time = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT;
808
809 mutex_lock(&local->sta_mtx);
810
811 list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
812 if (sdata != sta->sdata)
813 continue;
814
815 if (time_after(jiffies, sta->last_rx + exp_time) ||
816 (time_after(jiffies, sta->last_rx + exp_rsn_time) &&
817 sta->sta_state != IEEE80211_STA_AUTHORIZED)) {
818 sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n",
819 sta->sta_state != IEEE80211_STA_AUTHORIZED ?
820 "not authorized " : "", sta->sta.addr);
821
822 WARN_ON(__sta_info_destroy(sta));
823 }
824 }
825
826 mutex_unlock(&local->sta_mtx);
827}
828
694/* 829/*
695 * This function is called with state == IEEE80211_IBSS_MLME_JOINED 830 * This function is called with state == IEEE80211_IBSS_MLME_JOINED
696 */ 831 */
@@ -698,13 +833,14 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
698static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) 833static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
699{ 834{
700 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 835 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
836 enum nl80211_bss_scan_width scan_width;
701 837
702 lockdep_assert_held(&ifibss->mtx); 838 sdata_assert_lock(sdata);
703 839
704 mod_timer(&ifibss->timer, 840 mod_timer(&ifibss->timer,
705 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); 841 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
706 842
707 ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT); 843 ieee80211_ibss_sta_expire(sdata);
708 844
709 if (time_before(jiffies, ifibss->last_scan_completed + 845 if (time_before(jiffies, ifibss->last_scan_completed +
710 IEEE80211_IBSS_MERGE_INTERVAL)) 846 IEEE80211_IBSS_MERGE_INTERVAL))
@@ -719,8 +855,9 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
719 sdata_info(sdata, 855 sdata_info(sdata,
720 "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n"); 856 "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n");
721 857
858 scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
722 ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, 859 ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len,
723 NULL); 860 NULL, scan_width);
724} 861}
725 862
726static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) 863static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
@@ -730,7 +867,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
730 u16 capability; 867 u16 capability;
731 int i; 868 int i;
732 869
733 lockdep_assert_held(&ifibss->mtx); 870 sdata_assert_lock(sdata);
734 871
735 if (ifibss->fixed_bssid) { 872 if (ifibss->fixed_bssid) {
736 memcpy(bssid, ifibss->bssid, ETH_ALEN); 873 memcpy(bssid, ifibss->bssid, ETH_ALEN);
@@ -755,7 +892,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
755 sdata->drop_unencrypted = 0; 892 sdata->drop_unencrypted = 0;
756 893
757 __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, 894 __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int,
758 ifibss->channel, ifibss->basic_rates, 895 &ifibss->chandef, ifibss->basic_rates,
759 capability, 0, true); 896 capability, 0, true);
760} 897}
761 898
@@ -770,10 +907,11 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
770 struct cfg80211_bss *cbss; 907 struct cfg80211_bss *cbss;
771 struct ieee80211_channel *chan = NULL; 908 struct ieee80211_channel *chan = NULL;
772 const u8 *bssid = NULL; 909 const u8 *bssid = NULL;
910 enum nl80211_bss_scan_width scan_width;
773 int active_ibss; 911 int active_ibss;
774 u16 capability; 912 u16 capability;
775 913
776 lockdep_assert_held(&ifibss->mtx); 914 sdata_assert_lock(sdata);
777 915
778 active_ibss = ieee80211_sta_active_ibss(sdata); 916 active_ibss = ieee80211_sta_active_ibss(sdata);
779 ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss); 917 ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss);
@@ -787,7 +925,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
787 if (ifibss->fixed_bssid) 925 if (ifibss->fixed_bssid)
788 bssid = ifibss->bssid; 926 bssid = ifibss->bssid;
789 if (ifibss->fixed_channel) 927 if (ifibss->fixed_channel)
790 chan = ifibss->channel; 928 chan = ifibss->chandef.chan;
791 if (!is_zero_ether_addr(ifibss->bssid)) 929 if (!is_zero_ether_addr(ifibss->bssid))
792 bssid = ifibss->bssid; 930 bssid = ifibss->bssid;
793 cbss = cfg80211_get_bss(local->hw.wiphy, chan, bssid, 931 cbss = cfg80211_get_bss(local->hw.wiphy, chan, bssid,
@@ -811,6 +949,17 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
811 return; 949 return;
812 } 950 }
813 951
952 /* if a fixed bssid and a fixed freq have been provided create the IBSS
953 * directly and do not waste time scanning
954 */
955 if (ifibss->fixed_bssid && ifibss->fixed_channel) {
956 sdata_info(sdata, "Created IBSS using preconfigured BSSID %pM\n",
957 bssid);
958 ieee80211_sta_create_ibss(sdata);
959 return;
960 }
961
962
814 ibss_dbg(sdata, "sta_find_ibss: did not try to join ibss\n"); 963 ibss_dbg(sdata, "sta_find_ibss: did not try to join ibss\n");
815 964
816 /* Selected IBSS not found in current scan results - try to scan */ 965 /* Selected IBSS not found in current scan results - try to scan */
@@ -818,8 +967,10 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
818 IEEE80211_SCAN_INTERVAL)) { 967 IEEE80211_SCAN_INTERVAL)) {
819 sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); 968 sdata_info(sdata, "Trigger new scan to find an IBSS to join\n");
820 969
970 scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
821 ieee80211_request_ibss_scan(sdata, ifibss->ssid, 971 ieee80211_request_ibss_scan(sdata, ifibss->ssid,
822 ifibss->ssid_len, chan); 972 ifibss->ssid_len, chan,
973 scan_width);
823 } else { 974 } else {
824 int interval = IEEE80211_SCAN_INTERVAL; 975 int interval = IEEE80211_SCAN_INTERVAL;
825 976
@@ -843,10 +994,10 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
843 struct beacon_data *presp; 994 struct beacon_data *presp;
844 u8 *pos, *end; 995 u8 *pos, *end;
845 996
846 lockdep_assert_held(&ifibss->mtx); 997 sdata_assert_lock(sdata);
847 998
848 presp = rcu_dereference_protected(ifibss->presp, 999 presp = rcu_dereference_protected(ifibss->presp,
849 lockdep_is_held(&ifibss->mtx)); 1000 lockdep_is_held(&sdata->wdev.mtx));
850 1001
851 if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || 1002 if (ifibss->state != IEEE80211_IBSS_MLME_JOINED ||
852 len < 24 + 2 || !presp) 1003 len < 24 + 2 || !presp)
@@ -930,7 +1081,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
930 mgmt = (struct ieee80211_mgmt *) skb->data; 1081 mgmt = (struct ieee80211_mgmt *) skb->data;
931 fc = le16_to_cpu(mgmt->frame_control); 1082 fc = le16_to_cpu(mgmt->frame_control);
932 1083
933 mutex_lock(&sdata->u.ibss.mtx); 1084 sdata_lock(sdata);
934 1085
935 if (!sdata->u.ibss.ssid_len) 1086 if (!sdata->u.ibss.ssid_len)
936 goto mgmt_out; /* not ready to merge yet */ 1087 goto mgmt_out; /* not ready to merge yet */
@@ -953,7 +1104,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
953 } 1104 }
954 1105
955 mgmt_out: 1106 mgmt_out:
956 mutex_unlock(&sdata->u.ibss.mtx); 1107 sdata_unlock(sdata);
957} 1108}
958 1109
959void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) 1110void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
@@ -961,7 +1112,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
961 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 1112 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
962 struct sta_info *sta; 1113 struct sta_info *sta;
963 1114
964 mutex_lock(&ifibss->mtx); 1115 sdata_lock(sdata);
965 1116
966 /* 1117 /*
967 * Work could be scheduled after scan or similar 1118 * Work could be scheduled after scan or similar
@@ -978,7 +1129,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
978 list_del(&sta->list); 1129 list_del(&sta->list);
979 spin_unlock_bh(&ifibss->incomplete_lock); 1130 spin_unlock_bh(&ifibss->incomplete_lock);
980 1131
981 ieee80211_ibss_finish_sta(sta, true); 1132 ieee80211_ibss_finish_sta(sta);
982 rcu_read_unlock(); 1133 rcu_read_unlock();
983 spin_lock_bh(&ifibss->incomplete_lock); 1134 spin_lock_bh(&ifibss->incomplete_lock);
984 } 1135 }
@@ -997,7 +1148,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
997 } 1148 }
998 1149
999 out: 1150 out:
1000 mutex_unlock(&ifibss->mtx); 1151 sdata_unlock(sdata);
1001} 1152}
1002 1153
1003static void ieee80211_ibss_timer(unsigned long data) 1154static void ieee80211_ibss_timer(unsigned long data)
@@ -1014,7 +1165,6 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
1014 1165
1015 setup_timer(&ifibss->timer, ieee80211_ibss_timer, 1166 setup_timer(&ifibss->timer, ieee80211_ibss_timer,
1016 (unsigned long) sdata); 1167 (unsigned long) sdata);
1017 mutex_init(&ifibss->mtx);
1018 INIT_LIST_HEAD(&ifibss->incomplete_stations); 1168 INIT_LIST_HEAD(&ifibss->incomplete_stations);
1019 spin_lock_init(&ifibss->incomplete_lock); 1169 spin_lock_init(&ifibss->incomplete_lock);
1020} 1170}
@@ -1040,8 +1190,9 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1040 struct cfg80211_ibss_params *params) 1190 struct cfg80211_ibss_params *params)
1041{ 1191{
1042 u32 changed = 0; 1192 u32 changed = 0;
1043 1193 u32 rate_flags;
1044 mutex_lock(&sdata->u.ibss.mtx); 1194 struct ieee80211_supported_band *sband;
1195 int i;
1045 1196
1046 if (params->bssid) { 1197 if (params->bssid) {
1047 memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); 1198 memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN);
@@ -1052,14 +1203,20 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1052 sdata->u.ibss.privacy = params->privacy; 1203 sdata->u.ibss.privacy = params->privacy;
1053 sdata->u.ibss.control_port = params->control_port; 1204 sdata->u.ibss.control_port = params->control_port;
1054 sdata->u.ibss.basic_rates = params->basic_rates; 1205 sdata->u.ibss.basic_rates = params->basic_rates;
1206
1207 /* fix basic_rates if channel does not support these rates */
1208 rate_flags = ieee80211_chandef_rate_flags(&params->chandef);
1209 sband = sdata->local->hw.wiphy->bands[params->chandef.chan->band];
1210 for (i = 0; i < sband->n_bitrates; i++) {
1211 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
1212 sdata->u.ibss.basic_rates &= ~BIT(i);
1213 }
1055 memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate, 1214 memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate,
1056 sizeof(params->mcast_rate)); 1215 sizeof(params->mcast_rate));
1057 1216
1058 sdata->vif.bss_conf.beacon_int = params->beacon_interval; 1217 sdata->vif.bss_conf.beacon_int = params->beacon_interval;
1059 1218
1060 sdata->u.ibss.channel = params->chandef.chan; 1219 sdata->u.ibss.chandef = params->chandef;
1061 sdata->u.ibss.channel_type =
1062 cfg80211_get_chandef_type(&params->chandef);
1063 sdata->u.ibss.fixed_channel = params->channel_fixed; 1220 sdata->u.ibss.fixed_channel = params->channel_fixed;
1064 1221
1065 if (params->ie) { 1222 if (params->ie) {
@@ -1075,7 +1232,10 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1075 memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len); 1232 memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len);
1076 sdata->u.ibss.ssid_len = params->ssid_len; 1233 sdata->u.ibss.ssid_len = params->ssid_len;
1077 1234
1078 mutex_unlock(&sdata->u.ibss.mtx); 1235 memcpy(&sdata->u.ibss.ht_capa, &params->ht_capa,
1236 sizeof(sdata->u.ibss.ht_capa));
1237 memcpy(&sdata->u.ibss.ht_capa_mask, &params->ht_capa_mask,
1238 sizeof(sdata->u.ibss.ht_capa_mask));
1079 1239
1080 /* 1240 /*
1081 * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is 1241 * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is
@@ -1112,8 +1272,6 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
1112 struct sta_info *sta; 1272 struct sta_info *sta;
1113 struct beacon_data *presp; 1273 struct beacon_data *presp;
1114 1274
1115 mutex_lock(&sdata->u.ibss.mtx);
1116
1117 active_ibss = ieee80211_sta_active_ibss(sdata); 1275 active_ibss = ieee80211_sta_active_ibss(sdata);
1118 1276
1119 if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { 1277 if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) {
@@ -1122,7 +1280,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
1122 if (ifibss->privacy) 1280 if (ifibss->privacy)
1123 capability |= WLAN_CAPABILITY_PRIVACY; 1281 capability |= WLAN_CAPABILITY_PRIVACY;
1124 1282
1125 cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->channel, 1283 cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan,
1126 ifibss->bssid, ifibss->ssid, 1284 ifibss->bssid, ifibss->ssid,
1127 ifibss->ssid_len, WLAN_CAPABILITY_IBSS | 1285 ifibss->ssid_len, WLAN_CAPABILITY_IBSS |
1128 WLAN_CAPABILITY_PRIVACY, 1286 WLAN_CAPABILITY_PRIVACY,
@@ -1157,8 +1315,13 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
1157 /* remove beacon */ 1315 /* remove beacon */
1158 kfree(sdata->u.ibss.ie); 1316 kfree(sdata->u.ibss.ie);
1159 presp = rcu_dereference_protected(ifibss->presp, 1317 presp = rcu_dereference_protected(ifibss->presp,
1160 lockdep_is_held(&sdata->u.ibss.mtx)); 1318 lockdep_is_held(&sdata->wdev.mtx));
1161 RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); 1319 RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
1320
1321 /* on the next join, re-program HT parameters */
1322 memset(&ifibss->ht_capa, 0, sizeof(ifibss->ht_capa));
1323 memset(&ifibss->ht_capa_mask, 0, sizeof(ifibss->ht_capa_mask));
1324
1162 sdata->vif.bss_conf.ibss_joined = false; 1325 sdata->vif.bss_conf.ibss_joined = false;
1163 sdata->vif.bss_conf.ibss_creator = false; 1326 sdata->vif.bss_conf.ibss_creator = false;
1164 sdata->vif.bss_conf.enable_beacon = false; 1327 sdata->vif.bss_conf.enable_beacon = false;
@@ -1166,6 +1329,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
1166 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); 1329 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
1167 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | 1330 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
1168 BSS_CHANGED_IBSS); 1331 BSS_CHANGED_IBSS);
1332 ieee80211_vif_release_channel(sdata);
1169 synchronize_rcu(); 1333 synchronize_rcu();
1170 kfree(presp); 1334 kfree(presp);
1171 1335
@@ -1173,7 +1337,5 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
1173 1337
1174 del_timer_sync(&sdata->u.ibss.timer); 1338 del_timer_sync(&sdata->u.ibss.timer);
1175 1339
1176 mutex_unlock(&sdata->u.ibss.mtx);
1177
1178 return 0; 1340 return 0;
1179} 1341}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 44be28cfc6c4..b6186517ec56 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -53,9 +53,6 @@ struct ieee80211_local;
53 * increased memory use (about 2 kB of RAM per entry). */ 53 * increased memory use (about 2 kB of RAM per entry). */
54#define IEEE80211_FRAGMENT_MAX 4 54#define IEEE80211_FRAGMENT_MAX 4
55 55
56#define TU_TO_JIFFIES(x) (usecs_to_jiffies((x) * 1024))
57#define TU_TO_EXP_TIME(x) (jiffies + TU_TO_JIFFIES(x))
58
59/* power level hasn't been configured (or set to automatic) */ 56/* power level hasn't been configured (or set to automatic) */
60#define IEEE80211_UNSET_POWER_LEVEL INT_MIN 57#define IEEE80211_UNSET_POWER_LEVEL INT_MIN
61 58
@@ -94,6 +91,7 @@ struct ieee80211_bss {
94#define IEEE80211_MAX_SUPP_RATES 32 91#define IEEE80211_MAX_SUPP_RATES 32
95 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; 92 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
96 size_t supp_rates_len; 93 size_t supp_rates_len;
94 struct ieee80211_rate *beacon_rate;
97 95
98 /* 96 /*
99 * During association, we save an ERP value from a probe response so 97 * During association, we save an ERP value from a probe response so
@@ -258,6 +256,8 @@ struct ieee80211_if_ap {
258 struct beacon_data __rcu *beacon; 256 struct beacon_data __rcu *beacon;
259 struct probe_resp __rcu *probe_resp; 257 struct probe_resp __rcu *probe_resp;
260 258
259 /* to be used after channel switch. */
260 struct cfg80211_beacon_data *next_beacon;
261 struct list_head vlans; 261 struct list_head vlans;
262 262
263 struct ps_data ps; 263 struct ps_data ps;
@@ -366,7 +366,7 @@ struct ieee80211_mgd_assoc_data {
366 u8 ssid_len; 366 u8 ssid_len;
367 u8 supp_rates_len; 367 u8 supp_rates_len;
368 bool wmm, uapsd; 368 bool wmm, uapsd;
369 bool have_beacon, need_beacon; 369 bool need_beacon;
370 bool synced; 370 bool synced;
371 bool timeout_started; 371 bool timeout_started;
372 372
@@ -394,7 +394,6 @@ struct ieee80211_if_managed {
394 bool nullfunc_failed; 394 bool nullfunc_failed;
395 bool connection_loss; 395 bool connection_loss;
396 396
397 struct mutex mtx;
398 struct cfg80211_bss *associated; 397 struct cfg80211_bss *associated;
399 struct ieee80211_mgd_auth_data *auth_data; 398 struct ieee80211_mgd_auth_data *auth_data;
400 struct ieee80211_mgd_assoc_data *assoc_data; 399 struct ieee80211_mgd_assoc_data *assoc_data;
@@ -405,6 +404,7 @@ struct ieee80211_if_managed {
405 404
406 bool powersave; /* powersave requested for this iface */ 405 bool powersave; /* powersave requested for this iface */
407 bool broken_ap; /* AP is broken -- turn off powersave */ 406 bool broken_ap; /* AP is broken -- turn off powersave */
407 bool have_beacon;
408 u8 dtim_period; 408 u8 dtim_period;
409 enum ieee80211_smps_mode req_smps, /* requested smps mode */ 409 enum ieee80211_smps_mode req_smps, /* requested smps mode */
410 driver_smps_mode; /* smps mode request */ 410 driver_smps_mode; /* smps mode request */
@@ -488,8 +488,6 @@ struct ieee80211_if_managed {
488struct ieee80211_if_ibss { 488struct ieee80211_if_ibss {
489 struct timer_list timer; 489 struct timer_list timer;
490 490
491 struct mutex mtx;
492
493 unsigned long last_scan_completed; 491 unsigned long last_scan_completed;
494 492
495 u32 basic_rates; 493 u32 basic_rates;
@@ -499,19 +497,20 @@ struct ieee80211_if_ibss {
499 bool privacy; 497 bool privacy;
500 498
501 bool control_port; 499 bool control_port;
502 unsigned int auth_frame_registrations;
503 500
504 u8 bssid[ETH_ALEN] __aligned(2); 501 u8 bssid[ETH_ALEN] __aligned(2);
505 u8 ssid[IEEE80211_MAX_SSID_LEN]; 502 u8 ssid[IEEE80211_MAX_SSID_LEN];
506 u8 ssid_len, ie_len; 503 u8 ssid_len, ie_len;
507 u8 *ie; 504 u8 *ie;
508 struct ieee80211_channel *channel; 505 struct cfg80211_chan_def chandef;
509 enum nl80211_channel_type channel_type;
510 506
511 unsigned long ibss_join_req; 507 unsigned long ibss_join_req;
512 /* probe response/beacon for IBSS */ 508 /* probe response/beacon for IBSS */
513 struct beacon_data __rcu *presp; 509 struct beacon_data __rcu *presp;
514 510
511 struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */
512 struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */
513
515 spinlock_t incomplete_lock; 514 spinlock_t incomplete_lock;
516 struct list_head incomplete_stations; 515 struct list_head incomplete_stations;
517 516
@@ -545,6 +544,7 @@ struct ieee80211_if_mesh {
545 struct timer_list mesh_path_root_timer; 544 struct timer_list mesh_path_root_timer;
546 545
547 unsigned long wrkq_flags; 546 unsigned long wrkq_flags;
547 unsigned long mbss_changed;
548 548
549 u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; 549 u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN];
550 size_t mesh_id_len; 550 size_t mesh_id_len;
@@ -580,8 +580,6 @@ struct ieee80211_if_mesh {
580 bool accepting_plinks; 580 bool accepting_plinks;
581 int num_gates; 581 int num_gates;
582 struct beacon_data __rcu *beacon; 582 struct beacon_data __rcu *beacon;
583 /* just protects beacon updates for now */
584 struct mutex mtx;
585 const u8 *ie; 583 const u8 *ie;
586 u8 ie_len; 584 u8 ie_len;
587 enum { 585 enum {
@@ -717,6 +715,11 @@ struct ieee80211_sub_if_data {
717 715
718 struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; 716 struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
719 717
718 struct work_struct csa_finalize_work;
719 int csa_counter_offset_beacon;
720 int csa_counter_offset_presp;
721 bool csa_radar_required;
722
720 /* used to reconfigure hardware SM PS */ 723 /* used to reconfigure hardware SM PS */
721 struct work_struct recalc_smps; 724 struct work_struct recalc_smps;
722 725
@@ -778,6 +781,26 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
778 return container_of(p, struct ieee80211_sub_if_data, vif); 781 return container_of(p, struct ieee80211_sub_if_data, vif);
779} 782}
780 783
784static inline void sdata_lock(struct ieee80211_sub_if_data *sdata)
785 __acquires(&sdata->wdev.mtx)
786{
787 mutex_lock(&sdata->wdev.mtx);
788 __acquire(&sdata->wdev.mtx);
789}
790
791static inline void sdata_unlock(struct ieee80211_sub_if_data *sdata)
792 __releases(&sdata->wdev.mtx)
793{
794 mutex_unlock(&sdata->wdev.mtx);
795 __release(&sdata->wdev.mtx);
796}
797
798static inline void
799sdata_assert_lock(struct ieee80211_sub_if_data *sdata)
800{
801 lockdep_assert_held(&sdata->wdev.mtx);
802}
803
781static inline enum ieee80211_band 804static inline enum ieee80211_band
782ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata) 805ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata)
783{ 806{
@@ -793,6 +816,34 @@ ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata)
793 return band; 816 return band;
794} 817}
795 818
819static inline int
820ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef)
821{
822 switch (chandef->width) {
823 case NL80211_CHAN_WIDTH_5:
824 return 2;
825 case NL80211_CHAN_WIDTH_10:
826 return 1;
827 default:
828 return 0;
829 }
830}
831
832static inline int
833ieee80211_vif_get_shift(struct ieee80211_vif *vif)
834{
835 struct ieee80211_chanctx_conf *chanctx_conf;
836 int shift = 0;
837
838 rcu_read_lock();
839 chanctx_conf = rcu_dereference(vif->chanctx_conf);
840 if (chanctx_conf)
841 shift = ieee80211_chandef_get_shift(&chanctx_conf->def);
842 rcu_read_unlock();
843
844 return shift;
845}
846
796enum sdata_queue_type { 847enum sdata_queue_type {
797 IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0, 848 IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0,
798 IEEE80211_SDATA_QUEUE_AGG_START = 1, 849 IEEE80211_SDATA_QUEUE_AGG_START = 1,
@@ -1010,7 +1061,7 @@ struct ieee80211_local {
1010 struct cfg80211_ssid scan_ssid; 1061 struct cfg80211_ssid scan_ssid;
1011 struct cfg80211_scan_request *int_scan_req; 1062 struct cfg80211_scan_request *int_scan_req;
1012 struct cfg80211_scan_request *scan_req, *hw_scan_req; 1063 struct cfg80211_scan_request *scan_req, *hw_scan_req;
1013 struct ieee80211_channel *scan_channel; 1064 struct cfg80211_chan_def scan_chandef;
1014 enum ieee80211_band hw_scan_band; 1065 enum ieee80211_band hw_scan_band;
1015 int scan_channel_idx; 1066 int scan_channel_idx;
1016 int scan_ies_len; 1067 int scan_ies_len;
@@ -1047,7 +1098,6 @@ struct ieee80211_local {
1047 u32 dot11TransmittedFrameCount; 1098 u32 dot11TransmittedFrameCount;
1048 1099
1049#ifdef CONFIG_MAC80211_LEDS 1100#ifdef CONFIG_MAC80211_LEDS
1050 int tx_led_counter, rx_led_counter;
1051 struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led; 1101 struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led;
1052 struct tpt_led_trigger *tpt_led_trigger; 1102 struct tpt_led_trigger *tpt_led_trigger;
1053 char tx_led_name[32], rx_led_name[32], 1103 char tx_led_name[32], rx_led_name[32],
@@ -1290,7 +1340,8 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1290void ieee80211_scan_work(struct work_struct *work); 1340void ieee80211_scan_work(struct work_struct *work);
1291int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, 1341int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
1292 const u8 *ssid, u8 ssid_len, 1342 const u8 *ssid, u8 ssid_len,
1293 struct ieee80211_channel *chan); 1343 struct ieee80211_channel *chan,
1344 enum nl80211_bss_scan_width scan_width);
1294int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, 1345int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
1295 struct cfg80211_scan_request *req); 1346 struct cfg80211_scan_request *req);
1296void ieee80211_scan_cancel(struct ieee80211_local *local); 1347void ieee80211_scan_cancel(struct ieee80211_local *local);
@@ -1325,6 +1376,9 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free);
1325void ieee80211_sw_roc_work(struct work_struct *work); 1376void ieee80211_sw_roc_work(struct work_struct *work);
1326void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); 1377void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
1327 1378
1379/* channel switch handling */
1380void ieee80211_csa_finalize_work(struct work_struct *work);
1381
1328/* interface handling */ 1382/* interface handling */
1329int ieee80211_iface_init(void); 1383int ieee80211_iface_init(void);
1330void ieee80211_iface_exit(void); 1384void ieee80211_iface_exit(void);
@@ -1346,6 +1400,8 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local);
1346 1400
1347bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); 1401bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
1348void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); 1402void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
1403int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
1404 struct cfg80211_beacon_data *params);
1349 1405
1350static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) 1406static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
1351{ 1407{
@@ -1449,7 +1505,8 @@ extern void *mac80211_wiphy_privid; /* for wiphy privid */
1449u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, 1505u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
1450 enum nl80211_iftype type); 1506 enum nl80211_iftype type);
1451int ieee80211_frame_duration(enum ieee80211_band band, size_t len, 1507int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
1452 int rate, int erp, int short_preamble); 1508 int rate, int erp, int short_preamble,
1509 int shift);
1453void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, 1510void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
1454 struct ieee80211_hdr *hdr, const u8 *tsc, 1511 struct ieee80211_hdr *hdr, const u8 *tsc,
1455 gfp_t gfp); 1512 gfp_t gfp);
@@ -1497,18 +1554,16 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
1497 ieee80211_tx_skb_tid(sdata, skb, 7); 1554 ieee80211_tx_skb_tid(sdata, skb, 7);
1498} 1555}
1499 1556
1500u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, bool action, 1557u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
1501 struct ieee802_11_elems *elems, 1558 struct ieee802_11_elems *elems,
1502 u64 filter, u32 crc); 1559 u64 filter, u32 crc);
1503static inline void ieee802_11_parse_elems(u8 *start, size_t len, bool action, 1560static inline void ieee802_11_parse_elems(const u8 *start, size_t len,
1561 bool action,
1504 struct ieee802_11_elems *elems) 1562 struct ieee802_11_elems *elems)
1505{ 1563{
1506 ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0); 1564 ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0);
1507} 1565}
1508 1566
1509u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
1510 enum ieee80211_band band);
1511
1512void ieee80211_dynamic_ps_enable_work(struct work_struct *work); 1567void ieee80211_dynamic_ps_enable_work(struct work_struct *work);
1513void ieee80211_dynamic_ps_disable_work(struct work_struct *work); 1568void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
1514void ieee80211_dynamic_ps_timer(unsigned long data); 1569void ieee80211_dynamic_ps_timer(unsigned long data);
@@ -1555,7 +1610,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
1555int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, 1610int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1556 size_t buffer_len, const u8 *ie, size_t ie_len, 1611 size_t buffer_len, const u8 *ie, size_t ie_len,
1557 enum ieee80211_band band, u32 rate_mask, 1612 enum ieee80211_band band, u32 rate_mask,
1558 u8 channel); 1613 struct cfg80211_chan_def *chandef);
1559struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, 1614struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
1560 u8 *dst, u32 ratemask, 1615 u8 *dst, u32 ratemask,
1561 struct ieee80211_channel *chan, 1616 struct ieee80211_channel *chan,
@@ -1568,10 +1623,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1568 u32 ratemask, bool directed, u32 tx_flags, 1623 u32 ratemask, bool directed, u32 tx_flags,
1569 struct ieee80211_channel *channel, bool scan); 1624 struct ieee80211_channel *channel, bool scan);
1570 1625
1571void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, 1626u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
1572 const size_t supp_rates_len,
1573 const u8 *supp_rates);
1574u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
1575 struct ieee802_11_elems *elems, 1627 struct ieee802_11_elems *elems,
1576 enum ieee80211_band band, u32 *basic_rates); 1628 enum ieee80211_band band, u32 *basic_rates);
1577int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, 1629int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
@@ -1588,6 +1640,9 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
1588 u16 prot_mode); 1640 u16 prot_mode);
1589u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, 1641u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
1590 u32 cap); 1642 u32 cap);
1643int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
1644 const struct ieee80211_supported_band *sband,
1645 const u8 *srates, int srates_len, u32 *rates);
1591int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, 1646int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
1592 struct sk_buff *skb, bool need_basic, 1647 struct sk_buff *skb, bool need_basic,
1593 enum ieee80211_band band); 1648 enum ieee80211_band band);
@@ -1608,6 +1663,11 @@ int __must_check
1608ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, 1663ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
1609 const struct cfg80211_chan_def *chandef, 1664 const struct cfg80211_chan_def *chandef,
1610 u32 *changed); 1665 u32 *changed);
1666/* NOTE: only use ieee80211_vif_change_channel() for channel switch */
1667int __must_check
1668ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
1669 const struct cfg80211_chan_def *chandef,
1670 u32 *changed);
1611void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata); 1671void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata);
1612void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata); 1672void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata);
1613void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, 1673void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 98d20c0f6fed..fcecd633514e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -54,7 +54,7 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
54 return false; 54 return false;
55 } 55 }
56 56
57 power = chanctx_conf->def.chan->max_power; 57 power = ieee80211_chandef_max_power(&chanctx_conf->def);
58 rcu_read_unlock(); 58 rcu_read_unlock();
59 59
60 if (sdata->user_power_level != IEEE80211_UNSET_POWER_LEVEL) 60 if (sdata->user_power_level != IEEE80211_UNSET_POWER_LEVEL)
@@ -159,7 +159,8 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
159 return 0; 159 return 0;
160} 160}
161 161
162static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr) 162static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
163 bool check_dup)
163{ 164{
164 struct ieee80211_local *local = sdata->local; 165 struct ieee80211_local *local = sdata->local;
165 struct ieee80211_sub_if_data *iter; 166 struct ieee80211_sub_if_data *iter;
@@ -180,13 +181,16 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr)
180 ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) | 181 ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
181 ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8); 182 ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
182 183
184 if (!check_dup)
185 return ret;
183 186
184 mutex_lock(&local->iflist_mtx); 187 mutex_lock(&local->iflist_mtx);
185 list_for_each_entry(iter, &local->interfaces, list) { 188 list_for_each_entry(iter, &local->interfaces, list) {
186 if (iter == sdata) 189 if (iter == sdata)
187 continue; 190 continue;
188 191
189 if (iter->vif.type == NL80211_IFTYPE_MONITOR) 192 if (iter->vif.type == NL80211_IFTYPE_MONITOR &&
193 !(iter->u.mntr_flags & MONITOR_FLAG_ACTIVE))
190 continue; 194 continue;
191 195
192 m = iter->vif.addr; 196 m = iter->vif.addr;
@@ -208,12 +212,17 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
208{ 212{
209 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 213 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
210 struct sockaddr *sa = addr; 214 struct sockaddr *sa = addr;
215 bool check_dup = true;
211 int ret; 216 int ret;
212 217
213 if (ieee80211_sdata_running(sdata)) 218 if (ieee80211_sdata_running(sdata))
214 return -EBUSY; 219 return -EBUSY;
215 220
216 ret = ieee80211_verify_mac(sdata, sa->sa_data); 221 if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
222 !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
223 check_dup = false;
224
225 ret = ieee80211_verify_mac(sdata, sa->sa_data, check_dup);
217 if (ret) 226 if (ret)
218 return ret; 227 return ret;
219 228
@@ -265,6 +274,12 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
265 if (iftype == NL80211_IFTYPE_ADHOC && 274 if (iftype == NL80211_IFTYPE_ADHOC &&
266 nsdata->vif.type == NL80211_IFTYPE_ADHOC) 275 nsdata->vif.type == NL80211_IFTYPE_ADHOC)
267 return -EBUSY; 276 return -EBUSY;
277 /*
278 * will not add another interface while any channel
279 * switch is active.
280 */
281 if (nsdata->vif.csa_active)
282 return -EBUSY;
268 283
269 /* 284 /*
270 * The remaining checks are only performed for interfaces 285 * The remaining checks are only performed for interfaces
@@ -293,12 +308,13 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
293 return 0; 308 return 0;
294} 309}
295 310
296static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata) 311static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
312 enum nl80211_iftype iftype)
297{ 313{
298 int n_queues = sdata->local->hw.queues; 314 int n_queues = sdata->local->hw.queues;
299 int i; 315 int i;
300 316
301 if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) { 317 if (iftype != NL80211_IFTYPE_P2P_DEVICE) {
302 for (i = 0; i < IEEE80211_NUM_ACS; i++) { 318 for (i = 0; i < IEEE80211_NUM_ACS; i++) {
303 if (WARN_ON_ONCE(sdata->vif.hw_queue[i] == 319 if (WARN_ON_ONCE(sdata->vif.hw_queue[i] ==
304 IEEE80211_INVAL_HW_QUEUE)) 320 IEEE80211_INVAL_HW_QUEUE))
@@ -309,8 +325,9 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata)
309 } 325 }
310 } 326 }
311 327
312 if ((sdata->vif.type != NL80211_IFTYPE_AP && 328 if ((iftype != NL80211_IFTYPE_AP &&
313 sdata->vif.type != NL80211_IFTYPE_MESH_POINT) || 329 iftype != NL80211_IFTYPE_P2P_GO &&
330 iftype != NL80211_IFTYPE_MESH_POINT) ||
314 !(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) { 331 !(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) {
315 sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE; 332 sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
316 return 0; 333 return 0;
@@ -393,7 +410,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
393 return ret; 410 return ret;
394 } 411 }
395 412
396 ret = ieee80211_check_queues(sdata); 413 ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR);
397 if (ret) { 414 if (ret) {
398 kfree(sdata); 415 kfree(sdata);
399 return ret; 416 return ret;
@@ -545,7 +562,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
545 break; 562 break;
546 } 563 }
547 564
548 if (local->monitors == 0 && local->open_count == 0) { 565 if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) {
566 res = drv_add_interface(local, sdata);
567 if (res)
568 goto err_stop;
569 } else if (local->monitors == 0 && local->open_count == 0) {
549 res = ieee80211_add_virtual_monitor(local); 570 res = ieee80211_add_virtual_monitor(local);
550 if (res) 571 if (res)
551 goto err_stop; 572 goto err_stop;
@@ -573,7 +594,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
573 res = drv_add_interface(local, sdata); 594 res = drv_add_interface(local, sdata);
574 if (res) 595 if (res)
575 goto err_stop; 596 goto err_stop;
576 res = ieee80211_check_queues(sdata); 597 res = ieee80211_check_queues(sdata,
598 ieee80211_vif_type_p2p(&sdata->vif));
577 if (res) 599 if (res)
578 goto err_del_interface; 600 goto err_del_interface;
579 } 601 }
@@ -791,6 +813,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
791 cancel_work_sync(&local->dynamic_ps_enable_work); 813 cancel_work_sync(&local->dynamic_ps_enable_work);
792 814
793 cancel_work_sync(&sdata->recalc_smps); 815 cancel_work_sync(&sdata->recalc_smps);
816 sdata->vif.csa_active = false;
817 cancel_work_sync(&sdata->csa_finalize_work);
794 818
795 cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); 819 cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
796 820
@@ -923,7 +947,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
923 mutex_lock(&local->mtx); 947 mutex_lock(&local->mtx);
924 ieee80211_recalc_idle(local); 948 ieee80211_recalc_idle(local);
925 mutex_unlock(&local->mtx); 949 mutex_unlock(&local->mtx);
926 break; 950
951 if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
952 break;
953
954 /* fall through */
927 default: 955 default:
928 if (going_down) 956 if (going_down)
929 drv_remove_interface(local, sdata); 957 drv_remove_interface(local, sdata);
@@ -1072,7 +1100,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
1072 .ndo_start_xmit = ieee80211_monitor_start_xmit, 1100 .ndo_start_xmit = ieee80211_monitor_start_xmit,
1073 .ndo_set_rx_mode = ieee80211_set_multicast_list, 1101 .ndo_set_rx_mode = ieee80211_set_multicast_list,
1074 .ndo_change_mtu = ieee80211_change_mtu, 1102 .ndo_change_mtu = ieee80211_change_mtu,
1075 .ndo_set_mac_address = eth_mac_addr, 1103 .ndo_set_mac_address = ieee80211_change_mac,
1076 .ndo_select_queue = ieee80211_monitor_select_queue, 1104 .ndo_select_queue = ieee80211_monitor_select_queue,
1077}; 1105};
1078 1106
@@ -1250,6 +1278,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
1250 skb_queue_head_init(&sdata->skb_queue); 1278 skb_queue_head_init(&sdata->skb_queue);
1251 INIT_WORK(&sdata->work, ieee80211_iface_work); 1279 INIT_WORK(&sdata->work, ieee80211_iface_work);
1252 INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); 1280 INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
1281 INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work);
1253 1282
1254 switch (type) { 1283 switch (type) {
1255 case NL80211_IFTYPE_P2P_GO: 1284 case NL80211_IFTYPE_P2P_GO:
@@ -1363,14 +1392,14 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
1363 1392
1364 ret = drv_change_interface(local, sdata, internal_type, p2p); 1393 ret = drv_change_interface(local, sdata, internal_type, p2p);
1365 if (ret) 1394 if (ret)
1366 type = sdata->vif.type; 1395 type = ieee80211_vif_type_p2p(&sdata->vif);
1367 1396
1368 /* 1397 /*
1369 * Ignore return value here, there's not much we can do since 1398 * Ignore return value here, there's not much we can do since
1370 * the driver changed the interface type internally already. 1399 * the driver changed the interface type internally already.
1371 * The warnings will hopefully make driver authors fix it :-) 1400 * The warnings will hopefully make driver authors fix it :-)
1372 */ 1401 */
1373 ieee80211_check_queues(sdata); 1402 ieee80211_check_queues(sdata, type);
1374 1403
1375 ieee80211_setup_sdata(sdata, type); 1404 ieee80211_setup_sdata(sdata, type);
1376 1405
@@ -1747,10 +1776,9 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
1747} 1776}
1748 1777
1749static int netdev_notify(struct notifier_block *nb, 1778static int netdev_notify(struct notifier_block *nb,
1750 unsigned long state, 1779 unsigned long state, void *ptr)
1751 void *ndev)
1752{ 1780{
1753 struct net_device *dev = ndev; 1781 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1754 struct ieee80211_sub_if_data *sdata; 1782 struct ieee80211_sub_if_data *sdata;
1755 1783
1756 if (state != NETDEV_CHANGENAME) 1784 if (state != NETDEV_CHANGENAME)
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 67059b88fea5..620677e897bd 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -93,6 +93,9 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
93 93
94 might_sleep(); 94 might_sleep();
95 95
96 if (key->flags & KEY_FLAG_TAINTED)
97 return -EINVAL;
98
96 if (!key->local->ops->set_key) 99 if (!key->local->ops->set_key)
97 goto out_unsupported; 100 goto out_unsupported;
98 101
@@ -335,12 +338,12 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
335 switch (cipher) { 338 switch (cipher) {
336 case WLAN_CIPHER_SUITE_WEP40: 339 case WLAN_CIPHER_SUITE_WEP40:
337 case WLAN_CIPHER_SUITE_WEP104: 340 case WLAN_CIPHER_SUITE_WEP104:
338 key->conf.iv_len = WEP_IV_LEN; 341 key->conf.iv_len = IEEE80211_WEP_IV_LEN;
339 key->conf.icv_len = WEP_ICV_LEN; 342 key->conf.icv_len = IEEE80211_WEP_ICV_LEN;
340 break; 343 break;
341 case WLAN_CIPHER_SUITE_TKIP: 344 case WLAN_CIPHER_SUITE_TKIP:
342 key->conf.iv_len = TKIP_IV_LEN; 345 key->conf.iv_len = IEEE80211_TKIP_IV_LEN;
343 key->conf.icv_len = TKIP_ICV_LEN; 346 key->conf.icv_len = IEEE80211_TKIP_ICV_LEN;
344 if (seq) { 347 if (seq) {
345 for (i = 0; i < IEEE80211_NUM_TIDS; i++) { 348 for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
346 key->u.tkip.rx[i].iv32 = 349 key->u.tkip.rx[i].iv32 =
@@ -352,13 +355,13 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
352 spin_lock_init(&key->u.tkip.txlock); 355 spin_lock_init(&key->u.tkip.txlock);
353 break; 356 break;
354 case WLAN_CIPHER_SUITE_CCMP: 357 case WLAN_CIPHER_SUITE_CCMP:
355 key->conf.iv_len = CCMP_HDR_LEN; 358 key->conf.iv_len = IEEE80211_CCMP_HDR_LEN;
356 key->conf.icv_len = CCMP_MIC_LEN; 359 key->conf.icv_len = IEEE80211_CCMP_MIC_LEN;
357 if (seq) { 360 if (seq) {
358 for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) 361 for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
359 for (j = 0; j < CCMP_PN_LEN; j++) 362 for (j = 0; j < IEEE80211_CCMP_PN_LEN; j++)
360 key->u.ccmp.rx_pn[i][j] = 363 key->u.ccmp.rx_pn[i][j] =
361 seq[CCMP_PN_LEN - j - 1]; 364 seq[IEEE80211_CCMP_PN_LEN - j - 1];
362 } 365 }
363 /* 366 /*
364 * Initialize AES key state here as an optimization so that 367 * Initialize AES key state here as an optimization so that
@@ -375,9 +378,9 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
375 key->conf.iv_len = 0; 378 key->conf.iv_len = 0;
376 key->conf.icv_len = sizeof(struct ieee80211_mmie); 379 key->conf.icv_len = sizeof(struct ieee80211_mmie);
377 if (seq) 380 if (seq)
378 for (j = 0; j < CMAC_PN_LEN; j++) 381 for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++)
379 key->u.aes_cmac.rx_pn[j] = 382 key->u.aes_cmac.rx_pn[j] =
380 seq[CMAC_PN_LEN - j - 1]; 383 seq[IEEE80211_CMAC_PN_LEN - j - 1];
381 /* 384 /*
382 * Initialize AES key state here as an optimization so that 385 * Initialize AES key state here as an optimization so that
383 * it does not need to be initialized for every packet. 386 * it does not need to be initialized for every packet.
@@ -455,6 +458,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
455 struct ieee80211_sub_if_data *sdata, 458 struct ieee80211_sub_if_data *sdata,
456 struct sta_info *sta) 459 struct sta_info *sta)
457{ 460{
461 struct ieee80211_local *local = sdata->local;
458 struct ieee80211_key *old_key; 462 struct ieee80211_key *old_key;
459 int idx, ret; 463 int idx, ret;
460 bool pairwise; 464 bool pairwise;
@@ -484,10 +488,13 @@ int ieee80211_key_link(struct ieee80211_key *key,
484 488
485 ieee80211_debugfs_key_add(key); 489 ieee80211_debugfs_key_add(key);
486 490
487 ret = ieee80211_key_enable_hw_accel(key); 491 if (!local->wowlan) {
488 492 ret = ieee80211_key_enable_hw_accel(key);
489 if (ret) 493 if (ret)
490 ieee80211_key_free(key, true); 494 ieee80211_key_free(key, true);
495 } else {
496 ret = 0;
497 }
491 498
492 mutex_unlock(&sdata->local->key_mtx); 499 mutex_unlock(&sdata->local->key_mtx);
493 500
@@ -540,7 +547,7 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
540 void *iter_data) 547 void *iter_data)
541{ 548{
542 struct ieee80211_local *local = hw_to_local(hw); 549 struct ieee80211_local *local = hw_to_local(hw);
543 struct ieee80211_key *key; 550 struct ieee80211_key *key, *tmp;
544 struct ieee80211_sub_if_data *sdata; 551 struct ieee80211_sub_if_data *sdata;
545 552
546 ASSERT_RTNL(); 553 ASSERT_RTNL();
@@ -548,13 +555,14 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
548 mutex_lock(&local->key_mtx); 555 mutex_lock(&local->key_mtx);
549 if (vif) { 556 if (vif) {
550 sdata = vif_to_sdata(vif); 557 sdata = vif_to_sdata(vif);
551 list_for_each_entry(key, &sdata->key_list, list) 558 list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
552 iter(hw, &sdata->vif, 559 iter(hw, &sdata->vif,
553 key->sta ? &key->sta->sta : NULL, 560 key->sta ? &key->sta->sta : NULL,
554 &key->conf, iter_data); 561 &key->conf, iter_data);
555 } else { 562 } else {
556 list_for_each_entry(sdata, &local->interfaces, list) 563 list_for_each_entry(sdata, &local->interfaces, list)
557 list_for_each_entry(key, &sdata->key_list, list) 564 list_for_each_entry_safe(key, tmp,
565 &sdata->key_list, list)
558 iter(hw, &sdata->vif, 566 iter(hw, &sdata->vif,
559 key->sta ? &key->sta->sta : NULL, 567 key->sta ? &key->sta->sta : NULL,
560 &key->conf, iter_data); 568 &key->conf, iter_data);
@@ -740,14 +748,146 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
740 pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS]; 748 pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS];
741 else 749 else
742 pn = key->u.ccmp.rx_pn[tid]; 750 pn = key->u.ccmp.rx_pn[tid];
743 memcpy(seq->ccmp.pn, pn, CCMP_PN_LEN); 751 memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN);
744 break; 752 break;
745 case WLAN_CIPHER_SUITE_AES_CMAC: 753 case WLAN_CIPHER_SUITE_AES_CMAC:
746 if (WARN_ON(tid != 0)) 754 if (WARN_ON(tid != 0))
747 return; 755 return;
748 pn = key->u.aes_cmac.rx_pn; 756 pn = key->u.aes_cmac.rx_pn;
749 memcpy(seq->aes_cmac.pn, pn, CMAC_PN_LEN); 757 memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN);
750 break; 758 break;
751 } 759 }
752} 760}
753EXPORT_SYMBOL(ieee80211_get_key_rx_seq); 761EXPORT_SYMBOL(ieee80211_get_key_rx_seq);
762
763void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
764 struct ieee80211_key_seq *seq)
765{
766 struct ieee80211_key *key;
767 u64 pn64;
768
769 key = container_of(keyconf, struct ieee80211_key, conf);
770
771 switch (key->conf.cipher) {
772 case WLAN_CIPHER_SUITE_TKIP:
773 key->u.tkip.tx.iv32 = seq->tkip.iv32;
774 key->u.tkip.tx.iv16 = seq->tkip.iv16;
775 break;
776 case WLAN_CIPHER_SUITE_CCMP:
777 pn64 = (u64)seq->ccmp.pn[5] |
778 ((u64)seq->ccmp.pn[4] << 8) |
779 ((u64)seq->ccmp.pn[3] << 16) |
780 ((u64)seq->ccmp.pn[2] << 24) |
781 ((u64)seq->ccmp.pn[1] << 32) |
782 ((u64)seq->ccmp.pn[0] << 40);
783 atomic64_set(&key->u.ccmp.tx_pn, pn64);
784 break;
785 case WLAN_CIPHER_SUITE_AES_CMAC:
786 pn64 = (u64)seq->aes_cmac.pn[5] |
787 ((u64)seq->aes_cmac.pn[4] << 8) |
788 ((u64)seq->aes_cmac.pn[3] << 16) |
789 ((u64)seq->aes_cmac.pn[2] << 24) |
790 ((u64)seq->aes_cmac.pn[1] << 32) |
791 ((u64)seq->aes_cmac.pn[0] << 40);
792 atomic64_set(&key->u.aes_cmac.tx_pn, pn64);
793 break;
794 default:
795 WARN_ON(1);
796 break;
797 }
798}
799EXPORT_SYMBOL_GPL(ieee80211_set_key_tx_seq);
800
801void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
802 int tid, struct ieee80211_key_seq *seq)
803{
804 struct ieee80211_key *key;
805 u8 *pn;
806
807 key = container_of(keyconf, struct ieee80211_key, conf);
808
809 switch (key->conf.cipher) {
810 case WLAN_CIPHER_SUITE_TKIP:
811 if (WARN_ON(tid < 0 || tid >= IEEE80211_NUM_TIDS))
812 return;
813 key->u.tkip.rx[tid].iv32 = seq->tkip.iv32;
814 key->u.tkip.rx[tid].iv16 = seq->tkip.iv16;
815 break;
816 case WLAN_CIPHER_SUITE_CCMP:
817 if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
818 return;
819 if (tid < 0)
820 pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS];
821 else
822 pn = key->u.ccmp.rx_pn[tid];
823 memcpy(pn, seq->ccmp.pn, IEEE80211_CCMP_PN_LEN);
824 break;
825 case WLAN_CIPHER_SUITE_AES_CMAC:
826 if (WARN_ON(tid != 0))
827 return;
828 pn = key->u.aes_cmac.rx_pn;
829 memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN);
830 break;
831 default:
832 WARN_ON(1);
833 break;
834 }
835}
836EXPORT_SYMBOL_GPL(ieee80211_set_key_rx_seq);
837
838void ieee80211_remove_key(struct ieee80211_key_conf *keyconf)
839{
840 struct ieee80211_key *key;
841
842 key = container_of(keyconf, struct ieee80211_key, conf);
843
844 assert_key_lock(key->local);
845
846 /*
847 * if key was uploaded, we assume the driver will/has remove(d)
848 * it, so adjust bookkeeping accordingly
849 */
850 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
851 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
852
853 if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
854 (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
855 (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
856 increment_tailroom_need_count(key->sdata);
857 }
858
859 ieee80211_key_free(key, false);
860}
861EXPORT_SYMBOL_GPL(ieee80211_remove_key);
862
863struct ieee80211_key_conf *
864ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
865 struct ieee80211_key_conf *keyconf)
866{
867 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
868 struct ieee80211_local *local = sdata->local;
869 struct ieee80211_key *key;
870 int err;
871
872 if (WARN_ON(!local->wowlan))
873 return ERR_PTR(-EINVAL);
874
875 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
876 return ERR_PTR(-EINVAL);
877
878 key = ieee80211_key_alloc(keyconf->cipher, keyconf->keyidx,
879 keyconf->keylen, keyconf->key,
880 0, NULL);
881 if (IS_ERR(key))
882 return ERR_PTR(PTR_ERR(key));
883
884 if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED)
885 key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
886
887 err = ieee80211_key_link(key, sdata, NULL);
888 if (err)
889 return ERR_PTR(err);
890
891 return &key->conf;
892}
893EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_add);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index e8de3e6d7804..036d57e76a5e 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -19,17 +19,6 @@
19#define NUM_DEFAULT_KEYS 4 19#define NUM_DEFAULT_KEYS 4
20#define NUM_DEFAULT_MGMT_KEYS 2 20#define NUM_DEFAULT_MGMT_KEYS 2
21 21
22#define WEP_IV_LEN 4
23#define WEP_ICV_LEN 4
24#define ALG_CCMP_KEY_LEN 16
25#define CCMP_HDR_LEN 8
26#define CCMP_MIC_LEN 8
27#define CCMP_TK_LEN 16
28#define CCMP_PN_LEN 6
29#define TKIP_IV_LEN 8
30#define TKIP_ICV_LEN 4
31#define CMAC_PN_LEN 6
32
33struct ieee80211_local; 22struct ieee80211_local;
34struct ieee80211_sub_if_data; 23struct ieee80211_sub_if_data;
35struct sta_info; 24struct sta_info;
@@ -93,13 +82,13 @@ struct ieee80211_key {
93 * frames and the last counter is used with Robust 82 * frames and the last counter is used with Robust
94 * Management frames. 83 * Management frames.
95 */ 84 */
96 u8 rx_pn[IEEE80211_NUM_TIDS + 1][CCMP_PN_LEN]; 85 u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_CCMP_PN_LEN];
97 struct crypto_cipher *tfm; 86 struct crypto_cipher *tfm;
98 u32 replays; /* dot11RSNAStatsCCMPReplays */ 87 u32 replays; /* dot11RSNAStatsCCMPReplays */
99 } ccmp; 88 } ccmp;
100 struct { 89 struct {
101 atomic64_t tx_pn; 90 atomic64_t tx_pn;
102 u8 rx_pn[CMAC_PN_LEN]; 91 u8 rx_pn[IEEE80211_CMAC_PN_LEN];
103 struct crypto_cipher *tfm; 92 struct crypto_cipher *tfm;
104 u32 replays; /* dot11RSNAStatsCMACReplays */ 93 u32 replays; /* dot11RSNAStatsCMACReplays */
105 u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ 94 u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index bcffa6903129..e2b836446af3 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -12,27 +12,22 @@
12#include <linux/export.h> 12#include <linux/export.h>
13#include "led.h" 13#include "led.h"
14 14
15#define MAC80211_BLINK_DELAY 50 /* ms */
16
15void ieee80211_led_rx(struct ieee80211_local *local) 17void ieee80211_led_rx(struct ieee80211_local *local)
16{ 18{
19 unsigned long led_delay = MAC80211_BLINK_DELAY;
17 if (unlikely(!local->rx_led)) 20 if (unlikely(!local->rx_led))
18 return; 21 return;
19 if (local->rx_led_counter++ % 2 == 0) 22 led_trigger_blink_oneshot(local->rx_led, &led_delay, &led_delay, 0);
20 led_trigger_event(local->rx_led, LED_OFF);
21 else
22 led_trigger_event(local->rx_led, LED_FULL);
23} 23}
24 24
25/* q is 1 if a packet was enqueued, 0 if it has been transmitted */ 25void ieee80211_led_tx(struct ieee80211_local *local)
26void ieee80211_led_tx(struct ieee80211_local *local, int q)
27{ 26{
27 unsigned long led_delay = MAC80211_BLINK_DELAY;
28 if (unlikely(!local->tx_led)) 28 if (unlikely(!local->tx_led))
29 return; 29 return;
30 /* not sure how this is supposed to work ... */ 30 led_trigger_blink_oneshot(local->tx_led, &led_delay, &led_delay, 0);
31 local->tx_led_counter += 2*q-1;
32 if (local->tx_led_counter % 2 == 0)
33 led_trigger_event(local->tx_led, LED_OFF);
34 else
35 led_trigger_event(local->tx_led, LED_FULL);
36} 31}
37 32
38void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) 33void ieee80211_led_assoc(struct ieee80211_local *local, bool associated)
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index e0275d9befa8..89f4344f13b9 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -13,7 +13,7 @@
13 13
14#ifdef CONFIG_MAC80211_LEDS 14#ifdef CONFIG_MAC80211_LEDS
15void ieee80211_led_rx(struct ieee80211_local *local); 15void ieee80211_led_rx(struct ieee80211_local *local);
16void ieee80211_led_tx(struct ieee80211_local *local, int q); 16void ieee80211_led_tx(struct ieee80211_local *local);
17void ieee80211_led_assoc(struct ieee80211_local *local, 17void ieee80211_led_assoc(struct ieee80211_local *local,
18 bool associated); 18 bool associated);
19void ieee80211_led_radio(struct ieee80211_local *local, 19void ieee80211_led_radio(struct ieee80211_local *local,
@@ -27,7 +27,7 @@ void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
27static inline void ieee80211_led_rx(struct ieee80211_local *local) 27static inline void ieee80211_led_rx(struct ieee80211_local *local)
28{ 28{
29} 29}
30static inline void ieee80211_led_tx(struct ieee80211_local *local, int q) 30static inline void ieee80211_led_tx(struct ieee80211_local *local)
31{ 31{
32} 32}
33static inline void ieee80211_led_assoc(struct ieee80211_local *local, 33static inline void ieee80211_led_assoc(struct ieee80211_local *local,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8a7bfc47d577..21d5d44444d0 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -102,17 +102,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
102 102
103 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; 103 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
104 104
105 if (local->scan_channel) { 105 if (local->scan_chandef.chan) {
106 chandef.chan = local->scan_channel; 106 chandef = local->scan_chandef;
107 /* If scanning on oper channel, use whatever channel-type
108 * is currently in use.
109 */
110 if (chandef.chan == local->_oper_chandef.chan) {
111 chandef = local->_oper_chandef;
112 } else {
113 chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
114 chandef.center_freq1 = chandef.chan->center_freq;
115 }
116 } else if (local->tmp_channel) { 107 } else if (local->tmp_channel) {
117 chandef.chan = local->tmp_channel; 108 chandef.chan = local->tmp_channel;
118 chandef.width = NL80211_CHAN_WIDTH_20_NOHT; 109 chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
@@ -151,7 +142,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
151 changed |= IEEE80211_CONF_CHANGE_SMPS; 142 changed |= IEEE80211_CONF_CHANGE_SMPS;
152 } 143 }
153 144
154 power = chandef.chan->max_power; 145 power = ieee80211_chandef_max_power(&chandef);
155 146
156 rcu_read_lock(); 147 rcu_read_lock();
157 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 148 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -331,7 +322,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
331 return NOTIFY_DONE; 322 return NOTIFY_DONE;
332 323
333 ifmgd = &sdata->u.mgd; 324 ifmgd = &sdata->u.mgd;
334 mutex_lock(&ifmgd->mtx); 325 sdata_lock(sdata);
335 326
336 /* Copy the addresses to the bss_conf list */ 327 /* Copy the addresses to the bss_conf list */
337 ifa = idev->ifa_list; 328 ifa = idev->ifa_list;
@@ -349,7 +340,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
349 ieee80211_bss_info_change_notify(sdata, 340 ieee80211_bss_info_change_notify(sdata,
350 BSS_CHANGED_ARP_FILTER); 341 BSS_CHANGED_ARP_FILTER);
351 342
352 mutex_unlock(&ifmgd->mtx); 343 sdata_unlock(sdata);
353 344
354 return NOTIFY_DONE; 345 return NOTIFY_DONE;
355} 346}
@@ -686,8 +677,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
686 return -EINVAL; 677 return -EINVAL;
687 678
688#ifdef CONFIG_PM 679#ifdef CONFIG_PM
689 if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns) && 680 if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume))
690 (!local->ops->suspend || !local->ops->resume))
691 return -EINVAL; 681 return -EINVAL;
692#endif 682#endif
693 683
@@ -902,9 +892,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
902 if (!local->ops->remain_on_channel) 892 if (!local->ops->remain_on_channel)
903 local->hw.wiphy->max_remain_on_channel_duration = 5000; 893 local->hw.wiphy->max_remain_on_channel_duration = 5000;
904 894
905 if (local->ops->sched_scan_start)
906 local->hw.wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN;
907
908 /* mac80211 based drivers don't support internal TDLS setup */ 895 /* mac80211 based drivers don't support internal TDLS setup */
909 if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) 896 if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)
910 local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP; 897 local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP;
@@ -921,7 +908,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
921 hw->queues = IEEE80211_MAX_QUEUES; 908 hw->queues = IEEE80211_MAX_QUEUES;
922 909
923 local->workqueue = 910 local->workqueue =
924 alloc_ordered_workqueue(wiphy_name(local->hw.wiphy), 0); 911 alloc_ordered_workqueue("%s", 0, wiphy_name(local->hw.wiphy));
925 if (!local->workqueue) { 912 if (!local->workqueue) {
926 result = -ENOMEM; 913 result = -ENOMEM;
927 goto fail_workqueue; 914 goto fail_workqueue;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 6952760881c8..707ac61d63e5 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -62,7 +62,6 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
62 struct ieee802_11_elems *ie) 62 struct ieee802_11_elems *ie)
63{ 63{
64 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 64 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
65 struct ieee80211_local *local = sdata->local;
66 u32 basic_rates = 0; 65 u32 basic_rates = 0;
67 struct cfg80211_chan_def sta_chan_def; 66 struct cfg80211_chan_def sta_chan_def;
68 67
@@ -85,7 +84,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
85 (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth))) 84 (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth)))
86 return false; 85 return false;
87 86
88 ieee80211_sta_get_rates(local, ie, ieee80211_get_sdata_band(sdata), 87 ieee80211_sta_get_rates(sdata, ie, ieee80211_get_sdata_band(sdata),
89 &basic_rates); 88 &basic_rates);
90 89
91 if (sdata->vif.bss_conf.basic_rates != basic_rates) 90 if (sdata->vif.bss_conf.basic_rates != basic_rates)
@@ -271,11 +270,12 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
271 *pos++ = ifmsh->mesh_auth_id; 270 *pos++ = ifmsh->mesh_auth_id;
272 /* Mesh Formation Info - number of neighbors */ 271 /* Mesh Formation Info - number of neighbors */
273 neighbors = atomic_read(&ifmsh->estab_plinks); 272 neighbors = atomic_read(&ifmsh->estab_plinks);
274 /* Number of neighbor mesh STAs or 15 whichever is smaller */ 273 neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS);
275 neighbors = (neighbors > 15) ? 15 : neighbors;
276 *pos++ = neighbors << 1; 274 *pos++ = neighbors << 1;
277 /* Mesh capability */ 275 /* Mesh capability */
278 *pos = IEEE80211_MESHCONF_CAPAB_FORWARDING; 276 *pos = 0x00;
277 *pos |= ifmsh->mshcfg.dot11MeshForwarding ?
278 IEEE80211_MESHCONF_CAPAB_FORWARDING : 0x00;
279 *pos |= ifmsh->accepting_plinks ? 279 *pos |= ifmsh->accepting_plinks ?
280 IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; 280 IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
281 /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */ 281 /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */
@@ -417,7 +417,9 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
417 417
418 sband = local->hw.wiphy->bands[band]; 418 sband = local->hw.wiphy->bands[band];
419 if (!sband->ht_cap.ht_supported || 419 if (!sband->ht_cap.ht_supported ||
420 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) 420 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
421 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
422 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
421 return 0; 423 return 0;
422 424
423 if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap)) 425 if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap))
@@ -573,7 +575,7 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata)
573 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 575 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
574 u32 changed; 576 u32 changed;
575 577
576 ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); 578 ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ);
577 mesh_path_expire(sdata); 579 mesh_path_expire(sdata);
578 580
579 changed = mesh_accept_plinks_update(sdata); 581 changed = mesh_accept_plinks_update(sdata);
@@ -697,38 +699,38 @@ out_free:
697} 699}
698 700
699static int 701static int
700ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh) 702ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata)
701{ 703{
702 struct beacon_data *old_bcn; 704 struct beacon_data *old_bcn;
703 int ret; 705 int ret;
704 706
705 mutex_lock(&ifmsh->mtx); 707 old_bcn = rcu_dereference_protected(sdata->u.mesh.beacon,
706 708 lockdep_is_held(&sdata->wdev.mtx));
707 old_bcn = rcu_dereference_protected(ifmsh->beacon, 709 ret = ieee80211_mesh_build_beacon(&sdata->u.mesh);
708 lockdep_is_held(&ifmsh->mtx));
709 ret = ieee80211_mesh_build_beacon(ifmsh);
710 if (ret) 710 if (ret)
711 /* just reuse old beacon */ 711 /* just reuse old beacon */
712 goto out; 712 return ret;
713 713
714 if (old_bcn) 714 if (old_bcn)
715 kfree_rcu(old_bcn, rcu_head); 715 kfree_rcu(old_bcn, rcu_head);
716out: 716 return 0;
717 mutex_unlock(&ifmsh->mtx);
718 return ret;
719} 717}
720 718
721void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, 719void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata,
722 u32 changed) 720 u32 changed)
723{ 721{
724 if (sdata->vif.bss_conf.enable_beacon && 722 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
725 (changed & (BSS_CHANGED_BEACON | 723 unsigned long bits = changed;
726 BSS_CHANGED_HT | 724 u32 bit;
727 BSS_CHANGED_BASIC_RATES | 725
728 BSS_CHANGED_BEACON_INT))) 726 if (!bits)
729 if (ieee80211_mesh_rebuild_beacon(&sdata->u.mesh)) 727 return;
730 return; 728
731 ieee80211_bss_info_change_notify(sdata, changed); 729 /* if we race with running work, worst case this work becomes a noop */
730 for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE)
731 set_bit(bit, &ifmsh->mbss_changed);
732 set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags);
733 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
732} 734}
733 735
734int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) 736int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
@@ -740,7 +742,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
740 BSS_CHANGED_HT | 742 BSS_CHANGED_HT |
741 BSS_CHANGED_BASIC_RATES | 743 BSS_CHANGED_BASIC_RATES |
742 BSS_CHANGED_BEACON_INT; 744 BSS_CHANGED_BEACON_INT;
743 enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
744 745
745 local->fif_other_bss++; 746 local->fif_other_bss++;
746 /* mesh ifaces must set allmulti to forward mcast traffic */ 747 /* mesh ifaces must set allmulti to forward mcast traffic */
@@ -748,7 +749,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
748 ieee80211_configure_filter(local); 749 ieee80211_configure_filter(local);
749 750
750 ifmsh->mesh_cc_id = 0; /* Disabled */ 751 ifmsh->mesh_cc_id = 0; /* Disabled */
751 ifmsh->mesh_auth_id = 0; /* Disabled */
752 /* register sync ops from extensible synchronization framework */ 752 /* register sync ops from extensible synchronization framework */
753 ifmsh->sync_ops = ieee80211_mesh_sync_ops_get(ifmsh->mesh_sp_id); 753 ifmsh->sync_ops = ieee80211_mesh_sync_ops_get(ifmsh->mesh_sp_id);
754 ifmsh->adjusting_tbtt = false; 754 ifmsh->adjusting_tbtt = false;
@@ -759,8 +759,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
759 sdata->vif.bss_conf.ht_operation_mode = 759 sdata->vif.bss_conf.ht_operation_mode =
760 ifmsh->mshcfg.ht_opmode; 760 ifmsh->mshcfg.ht_opmode;
761 sdata->vif.bss_conf.enable_beacon = true; 761 sdata->vif.bss_conf.enable_beacon = true;
762 sdata->vif.bss_conf.basic_rates =
763 ieee80211_mandatory_rates(local, band);
764 762
765 changed |= ieee80211_mps_local_status_update(sdata); 763 changed |= ieee80211_mps_local_status_update(sdata);
766 764
@@ -788,12 +786,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
788 sdata->vif.bss_conf.enable_beacon = false; 786 sdata->vif.bss_conf.enable_beacon = false;
789 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); 787 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
790 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); 788 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
791 mutex_lock(&ifmsh->mtx);
792 bcn = rcu_dereference_protected(ifmsh->beacon, 789 bcn = rcu_dereference_protected(ifmsh->beacon,
793 lockdep_is_held(&ifmsh->mtx)); 790 lockdep_is_held(&sdata->wdev.mtx));
794 rcu_assign_pointer(ifmsh->beacon, NULL); 791 rcu_assign_pointer(ifmsh->beacon, NULL);
795 kfree_rcu(bcn, rcu_head); 792 kfree_rcu(bcn, rcu_head);
796 mutex_unlock(&ifmsh->mtx);
797 793
798 /* flush STAs and mpaths on this iface */ 794 /* flush STAs and mpaths on this iface */
799 sta_info_flush(sdata); 795 sta_info_flush(sdata);
@@ -806,14 +802,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
806 del_timer_sync(&sdata->u.mesh.housekeeping_timer); 802 del_timer_sync(&sdata->u.mesh.housekeeping_timer);
807 del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); 803 del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
808 del_timer_sync(&sdata->u.mesh.mesh_path_timer); 804 del_timer_sync(&sdata->u.mesh.mesh_path_timer);
809 /* 805
810 * If the timer fired while we waited for it, it will have 806 /* clear any mesh work (for next join) we may have accrued */
811 * requeued the work. Now the work will be running again 807 ifmsh->wrkq_flags = 0;
812 * but will not rearm the timer again because it checks 808 ifmsh->mbss_changed = 0;
813 * whether the interface is running, which, at this point,
814 * it no longer is.
815 */
816 cancel_work_sync(&sdata->work);
817 809
818 local->fif_other_bss--; 810 local->fif_other_bss--;
819 atomic_dec(&local->iff_allmultis); 811 atomic_dec(&local->iff_allmultis);
@@ -840,6 +832,9 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
840 832
841 ieee802_11_parse_elems(pos, len - baselen, false, &elems); 833 ieee802_11_parse_elems(pos, len - baselen, false, &elems);
842 834
835 if (!elems.mesh_id)
836 return;
837
843 /* 802.11-2012 10.1.4.3.2 */ 838 /* 802.11-2012 10.1.4.3.2 */
844 if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) && 839 if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) &&
845 !is_broadcast_ether_addr(mgmt->da)) || 840 !is_broadcast_ether_addr(mgmt->da)) ||
@@ -954,6 +949,12 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
954 struct ieee80211_mgmt *mgmt; 949 struct ieee80211_mgmt *mgmt;
955 u16 stype; 950 u16 stype;
956 951
952 sdata_lock(sdata);
953
954 /* mesh already went down */
955 if (!sdata->wdev.mesh_id_len)
956 goto out;
957
957 rx_status = IEEE80211_SKB_RXCB(skb); 958 rx_status = IEEE80211_SKB_RXCB(skb);
958 mgmt = (struct ieee80211_mgmt *) skb->data; 959 mgmt = (struct ieee80211_mgmt *) skb->data;
959 stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; 960 stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
@@ -971,12 +972,42 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
971 ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); 972 ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status);
972 break; 973 break;
973 } 974 }
975out:
976 sdata_unlock(sdata);
977}
978
979static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata)
980{
981 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
982 u32 bit, changed = 0;
983
984 for_each_set_bit(bit, &ifmsh->mbss_changed,
985 sizeof(changed) * BITS_PER_BYTE) {
986 clear_bit(bit, &ifmsh->mbss_changed);
987 changed |= BIT(bit);
988 }
989
990 if (sdata->vif.bss_conf.enable_beacon &&
991 (changed & (BSS_CHANGED_BEACON |
992 BSS_CHANGED_HT |
993 BSS_CHANGED_BASIC_RATES |
994 BSS_CHANGED_BEACON_INT)))
995 if (ieee80211_mesh_rebuild_beacon(sdata))
996 return;
997
998 ieee80211_bss_info_change_notify(sdata, changed);
974} 999}
975 1000
976void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) 1001void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
977{ 1002{
978 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 1003 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
979 1004
1005 sdata_lock(sdata);
1006
1007 /* mesh already went down */
1008 if (!sdata->wdev.mesh_id_len)
1009 goto out;
1010
980 if (ifmsh->preq_queue_len && 1011 if (ifmsh->preq_queue_len &&
981 time_after(jiffies, 1012 time_after(jiffies,
982 ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval))) 1013 ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval)))
@@ -996,6 +1027,11 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
996 1027
997 if (test_and_clear_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags)) 1028 if (test_and_clear_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags))
998 mesh_sync_adjust_tbtt(sdata); 1029 mesh_sync_adjust_tbtt(sdata);
1030
1031 if (test_and_clear_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags))
1032 mesh_bss_info_changed(sdata);
1033out:
1034 sdata_unlock(sdata);
999} 1035}
1000 1036
1001void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) 1037void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
@@ -1041,7 +1077,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
1041 spin_lock_init(&ifmsh->mesh_preq_queue_lock); 1077 spin_lock_init(&ifmsh->mesh_preq_queue_lock);
1042 spin_lock_init(&ifmsh->sync_offset_lock); 1078 spin_lock_init(&ifmsh->sync_offset_lock);
1043 RCU_INIT_POINTER(ifmsh->beacon, NULL); 1079 RCU_INIT_POINTER(ifmsh->beacon, NULL);
1044 mutex_init(&ifmsh->mtx);
1045 1080
1046 sdata->vif.bss_conf.bssid = zero_addr; 1081 sdata->vif.bss_conf.bssid = zero_addr;
1047} 1082}
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index da158774eebb..2bc7fd2f787d 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -58,6 +58,7 @@ enum mesh_path_flags {
58 * @MESH_WORK_ROOT: the mesh root station needs to send a frame 58 * @MESH_WORK_ROOT: the mesh root station needs to send a frame
59 * @MESH_WORK_DRIFT_ADJUST: time to compensate for clock drift relative to other 59 * @MESH_WORK_DRIFT_ADJUST: time to compensate for clock drift relative to other
60 * mesh nodes 60 * mesh nodes
61 * @MESH_WORK_MBSS_CHANGED: rebuild beacon and notify driver of BSS changes
61 */ 62 */
62enum mesh_deferred_task_flags { 63enum mesh_deferred_task_flags {
63 MESH_WORK_HOUSEKEEPING, 64 MESH_WORK_HOUSEKEEPING,
@@ -65,6 +66,7 @@ enum mesh_deferred_task_flags {
65 MESH_WORK_GROW_MPP_TABLE, 66 MESH_WORK_GROW_MPP_TABLE,
66 MESH_WORK_ROOT, 67 MESH_WORK_ROOT,
67 MESH_WORK_DRIFT_ADJUST, 68 MESH_WORK_DRIFT_ADJUST,
69 MESH_WORK_MBSS_CHANGED,
68}; 70};
69 71
70/** 72/**
@@ -188,7 +190,6 @@ struct mesh_rmc {
188 u32 idx_mask; 190 u32 idx_mask;
189}; 191};
190 192
191#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ)
192#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) 193#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ)
193 194
194#define MESH_PATH_EXPIRE (600 * HZ) 195#define MESH_PATH_EXPIRE (600 * HZ)
@@ -324,14 +325,14 @@ static inline
324u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) 325u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
325{ 326{
326 atomic_inc(&sdata->u.mesh.estab_plinks); 327 atomic_inc(&sdata->u.mesh.estab_plinks);
327 return mesh_accept_plinks_update(sdata); 328 return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON;
328} 329}
329 330
330static inline 331static inline
331u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) 332u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
332{ 333{
333 atomic_dec(&sdata->u.mesh.estab_plinks); 334 atomic_dec(&sdata->u.mesh.estab_plinks);
334 return mesh_accept_plinks_update(sdata); 335 return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON;
335} 336}
336 337
337static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata) 338static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 09bebed99416..6b65d5055f5b 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -154,8 +154,14 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
154 u16 ht_opmode; 154 u16 ht_opmode;
155 bool non_ht_sta = false, ht20_sta = false; 155 bool non_ht_sta = false, ht20_sta = false;
156 156
157 if (sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) 157 switch (sdata->vif.bss_conf.chandef.width) {
158 case NL80211_CHAN_WIDTH_20_NOHT:
159 case NL80211_CHAN_WIDTH_5:
160 case NL80211_CHAN_WIDTH_10:
158 return 0; 161 return 0;
162 default:
163 break;
164 }
159 165
160 rcu_read_lock(); 166 rcu_read_lock();
161 list_for_each_entry_rcu(sta, &local->sta_list, list) { 167 list_for_each_entry_rcu(sta, &local->sta_list, list) {
@@ -373,7 +379,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
373 u32 rates, basic_rates = 0, changed = 0; 379 u32 rates, basic_rates = 0, changed = 0;
374 380
375 sband = local->hw.wiphy->bands[band]; 381 sband = local->hw.wiphy->bands[band];
376 rates = ieee80211_sta_get_rates(local, elems, band, &basic_rates); 382 rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates);
377 383
378 spin_lock_bh(&sta->lock); 384 spin_lock_bh(&sta->lock);
379 sta->last_rx = jiffies; 385 sta->last_rx = jiffies;
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index 3b7bfc01ee36..22290a929b94 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -229,6 +229,10 @@ void ieee80211_mps_sta_status_update(struct sta_info *sta)
229 enum nl80211_mesh_power_mode pm; 229 enum nl80211_mesh_power_mode pm;
230 bool do_buffer; 230 bool do_buffer;
231 231
232 /* For non-assoc STA, prevent buffering or frame transmission */
233 if (sta->sta_state < IEEE80211_STA_ASSOC)
234 return;
235
232 /* 236 /*
233 * use peer-specific power mode if peering is established and the 237 * use peer-specific power mode if peering is established and the
234 * peer's power mode is known 238 * peer's power mode is known
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a8c2130c8ba4..86e4ad56b573 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -31,10 +31,12 @@
31#include "led.h" 31#include "led.h"
32 32
33#define IEEE80211_AUTH_TIMEOUT (HZ / 5) 33#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
34#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2)
34#define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10) 35#define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10)
35#define IEEE80211_AUTH_MAX_TRIES 3 36#define IEEE80211_AUTH_MAX_TRIES 3
36#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) 37#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5)
37#define IEEE80211_ASSOC_TIMEOUT (HZ / 5) 38#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
39#define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2)
38#define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) 40#define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10)
39#define IEEE80211_ASSOC_MAX_TRIES 3 41#define IEEE80211_ASSOC_MAX_TRIES 3
40 42
@@ -91,41 +93,6 @@ MODULE_PARM_DESC(probe_wait_ms,
91#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4 93#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
92 94
93/* 95/*
94 * All cfg80211 functions have to be called outside a locked
95 * section so that they can acquire a lock themselves... This
96 * is much simpler than queuing up things in cfg80211, but we
97 * do need some indirection for that here.
98 */
99enum rx_mgmt_action {
100 /* no action required */
101 RX_MGMT_NONE,
102
103 /* caller must call cfg80211_send_deauth() */
104 RX_MGMT_CFG80211_DEAUTH,
105
106 /* caller must call cfg80211_send_disassoc() */
107 RX_MGMT_CFG80211_DISASSOC,
108
109 /* caller must call cfg80211_send_rx_auth() */
110 RX_MGMT_CFG80211_RX_AUTH,
111
112 /* caller must call cfg80211_send_rx_assoc() */
113 RX_MGMT_CFG80211_RX_ASSOC,
114
115 /* caller must call cfg80211_send_assoc_timeout() */
116 RX_MGMT_CFG80211_ASSOC_TIMEOUT,
117
118 /* used when a processed beacon causes a deauth */
119 RX_MGMT_CFG80211_TX_DEAUTH,
120};
121
122/* utils */
123static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd)
124{
125 lockdep_assert_held(&ifmgd->mtx);
126}
127
128/*
129 * We can have multiple work items (and connection probing) 96 * We can have multiple work items (and connection probing)
130 * scheduling this timer, but we need to take care to only 97 * scheduling this timer, but we need to take care to only
131 * reschedule it when it should fire _earlier_ than it was 98 * reschedule it when it should fire _earlier_ than it was
@@ -135,13 +102,14 @@ static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd)
135 * has happened -- the work that runs from this timer will 102 * has happened -- the work that runs from this timer will
136 * do that. 103 * do that.
137 */ 104 */
138static void run_again(struct ieee80211_if_managed *ifmgd, unsigned long timeout) 105static void run_again(struct ieee80211_sub_if_data *sdata,
106 unsigned long timeout)
139{ 107{
140 ASSERT_MGD_MTX(ifmgd); 108 sdata_assert_lock(sdata);
141 109
142 if (!timer_pending(&ifmgd->timer) || 110 if (!timer_pending(&sdata->u.mgd.timer) ||
143 time_before(timeout, ifmgd->timer.expires)) 111 time_before(timeout, sdata->u.mgd.timer.expires))
144 mod_timer(&ifmgd->timer, timeout); 112 mod_timer(&sdata->u.mgd.timer, timeout);
145} 113}
146 114
147void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata) 115void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata)
@@ -224,6 +192,12 @@ static u32 chandef_downgrade(struct cfg80211_chan_def *c)
224 c->width = NL80211_CHAN_WIDTH_20_NOHT; 192 c->width = NL80211_CHAN_WIDTH_20_NOHT;
225 ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; 193 ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
226 break; 194 break;
195 case NL80211_CHAN_WIDTH_5:
196 case NL80211_CHAN_WIDTH_10:
197 WARN_ON_ONCE(1);
198 /* keep c->width */
199 ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
200 break;
227 } 201 }
228 202
229 WARN_ON_ONCE(!cfg80211_chandef_valid(c)); 203 WARN_ON_ONCE(!cfg80211_chandef_valid(c));
@@ -237,8 +211,9 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
237 struct ieee80211_channel *channel, 211 struct ieee80211_channel *channel,
238 const struct ieee80211_ht_operation *ht_oper, 212 const struct ieee80211_ht_operation *ht_oper,
239 const struct ieee80211_vht_operation *vht_oper, 213 const struct ieee80211_vht_operation *vht_oper,
240 struct cfg80211_chan_def *chandef, bool verbose) 214 struct cfg80211_chan_def *chandef, bool tracking)
241{ 215{
216 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
242 struct cfg80211_chan_def vht_chandef; 217 struct cfg80211_chan_def vht_chandef;
243 u32 ht_cfreq, ret; 218 u32 ht_cfreq, ret;
244 219
@@ -257,7 +232,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
257 ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, 232 ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
258 channel->band); 233 channel->band);
259 /* check that channel matches the right operating channel */ 234 /* check that channel matches the right operating channel */
260 if (channel->center_freq != ht_cfreq) { 235 if (!tracking && channel->center_freq != ht_cfreq) {
261 /* 236 /*
262 * It's possible that some APs are confused here; 237 * It's possible that some APs are confused here;
263 * Netgear WNDR3700 sometimes reports 4 higher than 238 * Netgear WNDR3700 sometimes reports 4 higher than
@@ -265,11 +240,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
265 * since we look at probe response/beacon data here 240 * since we look at probe response/beacon data here
266 * it should be OK. 241 * it should be OK.
267 */ 242 */
268 if (verbose) 243 sdata_info(sdata,
269 sdata_info(sdata, 244 "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
270 "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", 245 channel->center_freq, ht_cfreq,
271 channel->center_freq, ht_cfreq, 246 ht_oper->primary_chan, channel->band);
272 ht_oper->primary_chan, channel->band);
273 ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; 247 ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
274 goto out; 248 goto out;
275 } 249 }
@@ -323,7 +297,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
323 channel->band); 297 channel->band);
324 break; 298 break;
325 default: 299 default:
326 if (verbose) 300 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
327 sdata_info(sdata, 301 sdata_info(sdata,
328 "AP VHT operation IE has invalid channel width (%d), disable VHT\n", 302 "AP VHT operation IE has invalid channel width (%d), disable VHT\n",
329 vht_oper->chan_width); 303 vht_oper->chan_width);
@@ -332,7 +306,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
332 } 306 }
333 307
334 if (!cfg80211_chandef_valid(&vht_chandef)) { 308 if (!cfg80211_chandef_valid(&vht_chandef)) {
335 if (verbose) 309 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
336 sdata_info(sdata, 310 sdata_info(sdata,
337 "AP VHT information is invalid, disable VHT\n"); 311 "AP VHT information is invalid, disable VHT\n");
338 ret = IEEE80211_STA_DISABLE_VHT; 312 ret = IEEE80211_STA_DISABLE_VHT;
@@ -345,7 +319,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
345 } 319 }
346 320
347 if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { 321 if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
348 if (verbose) 322 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
349 sdata_info(sdata, 323 sdata_info(sdata,
350 "AP VHT information doesn't match HT, disable VHT\n"); 324 "AP VHT information doesn't match HT, disable VHT\n");
351 ret = IEEE80211_STA_DISABLE_VHT; 325 ret = IEEE80211_STA_DISABLE_VHT;
@@ -361,18 +335,27 @@ out:
361 if (ret & IEEE80211_STA_DISABLE_VHT) 335 if (ret & IEEE80211_STA_DISABLE_VHT)
362 vht_chandef = *chandef; 336 vht_chandef = *chandef;
363 337
338 /*
339 * Ignore the DISABLED flag when we're already connected and only
340 * tracking the APs beacon for bandwidth changes - otherwise we
341 * might get disconnected here if we connect to an AP, update our
342 * regulatory information based on the AP's country IE and the
343 * information we have is wrong/outdated and disables the channel
344 * that we're actually using for the connection to the AP.
345 */
364 while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, 346 while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
365 IEEE80211_CHAN_DISABLED)) { 347 tracking ? 0 :
348 IEEE80211_CHAN_DISABLED)) {
366 if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { 349 if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
367 ret = IEEE80211_STA_DISABLE_HT | 350 ret = IEEE80211_STA_DISABLE_HT |
368 IEEE80211_STA_DISABLE_VHT; 351 IEEE80211_STA_DISABLE_VHT;
369 goto out; 352 break;
370 } 353 }
371 354
372 ret |= chandef_downgrade(chandef); 355 ret |= chandef_downgrade(chandef);
373 } 356 }
374 357
375 if (chandef->width != vht_chandef.width && verbose) 358 if (chandef->width != vht_chandef.width && !tracking)
376 sdata_info(sdata, 359 sdata_info(sdata,
377 "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); 360 "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n");
378 361
@@ -412,7 +395,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
412 395
413 /* calculate new channel (type) based on HT/VHT operation IEs */ 396 /* calculate new channel (type) based on HT/VHT operation IEs */
414 flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper, 397 flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper,
415 vht_oper, &chandef, false); 398 vht_oper, &chandef, true);
416 399
417 /* 400 /*
418 * Downgrade the new channel if we associated with restricted 401 * Downgrade the new channel if we associated with restricted
@@ -506,27 +489,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
506 489
507/* frame sending functions */ 490/* frame sending functions */
508 491
509static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
510 struct ieee80211_supported_band *sband,
511 u32 *rates)
512{
513 int i, j, count;
514 *rates = 0;
515 count = 0;
516 for (i = 0; i < supp_rates_len; i++) {
517 int rate = (supp_rates[i] & 0x7F) * 5;
518
519 for (j = 0; j < sband->n_bitrates; j++)
520 if (sband->bitrates[j].bitrate == rate) {
521 *rates |= BIT(j);
522 count++;
523 break;
524 }
525 }
526
527 return count;
528}
529
530static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, 492static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
531 struct sk_buff *skb, u8 ap_ht_param, 493 struct sk_buff *skb, u8 ap_ht_param,
532 struct ieee80211_supported_band *sband, 494 struct ieee80211_supported_band *sband,
@@ -645,14 +607,14 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
645 struct ieee80211_mgmt *mgmt; 607 struct ieee80211_mgmt *mgmt;
646 u8 *pos, qos_info; 608 u8 *pos, qos_info;
647 size_t offset = 0, noffset; 609 size_t offset = 0, noffset;
648 int i, count, rates_len, supp_rates_len; 610 int i, count, rates_len, supp_rates_len, shift;
649 u16 capab; 611 u16 capab;
650 struct ieee80211_supported_band *sband; 612 struct ieee80211_supported_band *sband;
651 struct ieee80211_chanctx_conf *chanctx_conf; 613 struct ieee80211_chanctx_conf *chanctx_conf;
652 struct ieee80211_channel *chan; 614 struct ieee80211_channel *chan;
653 u32 rates = 0; 615 u32 rate_flags, rates = 0;
654 616
655 lockdep_assert_held(&ifmgd->mtx); 617 sdata_assert_lock(sdata);
656 618
657 rcu_read_lock(); 619 rcu_read_lock();
658 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); 620 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
@@ -661,8 +623,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
661 return; 623 return;
662 } 624 }
663 chan = chanctx_conf->def.chan; 625 chan = chanctx_conf->def.chan;
626 rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
664 rcu_read_unlock(); 627 rcu_read_unlock();
665 sband = local->hw.wiphy->bands[chan->band]; 628 sband = local->hw.wiphy->bands[chan->band];
629 shift = ieee80211_vif_get_shift(&sdata->vif);
666 630
667 if (assoc_data->supp_rates_len) { 631 if (assoc_data->supp_rates_len) {
668 /* 632 /*
@@ -671,17 +635,24 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
671 * in the association request (e.g. D-Link DAP 1353 in 635 * in the association request (e.g. D-Link DAP 1353 in
672 * b-only mode)... 636 * b-only mode)...
673 */ 637 */
674 rates_len = ieee80211_compatible_rates(assoc_data->supp_rates, 638 rates_len = ieee80211_parse_bitrates(&chanctx_conf->def, sband,
675 assoc_data->supp_rates_len, 639 assoc_data->supp_rates,
676 sband, &rates); 640 assoc_data->supp_rates_len,
641 &rates);
677 } else { 642 } else {
678 /* 643 /*
679 * In case AP not provide any supported rates information 644 * In case AP not provide any supported rates information
680 * before association, we send information element(s) with 645 * before association, we send information element(s) with
681 * all rates that we support. 646 * all rates that we support.
682 */ 647 */
683 rates = ~0; 648 rates_len = 0;
684 rates_len = sband->n_bitrates; 649 for (i = 0; i < sband->n_bitrates; i++) {
650 if ((rate_flags & sband->bitrates[i].flags)
651 != rate_flags)
652 continue;
653 rates |= BIT(i);
654 rates_len++;
655 }
685 } 656 }
686 657
687 skb = alloc_skb(local->hw.extra_tx_headroom + 658 skb = alloc_skb(local->hw.extra_tx_headroom +
@@ -758,8 +729,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
758 count = 0; 729 count = 0;
759 for (i = 0; i < sband->n_bitrates; i++) { 730 for (i = 0; i < sband->n_bitrates; i++) {
760 if (BIT(i) & rates) { 731 if (BIT(i) & rates) {
761 int rate = sband->bitrates[i].bitrate; 732 int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
762 *pos++ = (u8) (rate / 5); 733 5 * (1 << shift));
734 *pos++ = (u8) rate;
763 if (++count == 8) 735 if (++count == 8)
764 break; 736 break;
765 } 737 }
@@ -772,8 +744,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
772 744
773 for (i++; i < sband->n_bitrates; i++) { 745 for (i++; i < sband->n_bitrates; i++) {
774 if (BIT(i) & rates) { 746 if (BIT(i) & rates) {
775 int rate = sband->bitrates[i].bitrate; 747 int rate;
776 *pos++ = (u8) (rate / 5); 748 rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
749 5 * (1 << shift));
750 *pos++ = (u8) rate;
777 } 751 }
778 } 752 }
779 } 753 }
@@ -784,7 +758,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
784 *pos++ = WLAN_EID_PWR_CAPABILITY; 758 *pos++ = WLAN_EID_PWR_CAPABILITY;
785 *pos++ = 2; 759 *pos++ = 2;
786 *pos++ = 0; /* min tx power */ 760 *pos++ = 0; /* min tx power */
787 *pos++ = chan->max_power; /* max tx power */ 761 /* max tx power */
762 *pos++ = ieee80211_chandef_max_power(&chanctx_conf->def);
788 763
789 /* 2. supported channels */ 764 /* 2. supported channels */
790 /* TODO: get this in reg domain format */ 765 /* TODO: get this in reg domain format */
@@ -914,6 +889,10 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
914 889
915 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | 890 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
916 IEEE80211_TX_INTFL_OFFCHAN_TX_OK; 891 IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
892
893 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
894 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
895
917 if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | 896 if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
918 IEEE80211_STA_CONNECTION_POLL)) 897 IEEE80211_STA_CONNECTION_POLL))
919 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE; 898 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE;
@@ -962,7 +941,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
962 if (!ieee80211_sdata_running(sdata)) 941 if (!ieee80211_sdata_running(sdata))
963 return; 942 return;
964 943
965 mutex_lock(&ifmgd->mtx); 944 sdata_lock(sdata);
966 if (!ifmgd->associated) 945 if (!ifmgd->associated)
967 goto out; 946 goto out;
968 947
@@ -985,7 +964,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
985 IEEE80211_QUEUE_STOP_REASON_CSA); 964 IEEE80211_QUEUE_STOP_REASON_CSA);
986 out: 965 out:
987 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; 966 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
988 mutex_unlock(&ifmgd->mtx); 967 sdata_unlock(sdata);
989} 968}
990 969
991void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) 970void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
@@ -1036,7 +1015,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1036 const struct ieee80211_ht_operation *ht_oper; 1015 const struct ieee80211_ht_operation *ht_oper;
1037 int secondary_channel_offset = -1; 1016 int secondary_channel_offset = -1;
1038 1017
1039 ASSERT_MGD_MTX(ifmgd); 1018 sdata_assert_lock(sdata);
1040 1019
1041 if (!cbss) 1020 if (!cbss)
1042 return; 1021 return;
@@ -1134,6 +1113,15 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1134 case -1: 1113 case -1:
1135 cfg80211_chandef_create(&new_chandef, new_chan, 1114 cfg80211_chandef_create(&new_chandef, new_chan,
1136 NL80211_CHAN_NO_HT); 1115 NL80211_CHAN_NO_HT);
1116 /* keep width for 5/10 MHz channels */
1117 switch (sdata->vif.bss_conf.chandef.width) {
1118 case NL80211_CHAN_WIDTH_5:
1119 case NL80211_CHAN_WIDTH_10:
1120 new_chandef.width = sdata->vif.bss_conf.chandef.width;
1121 break;
1122 default:
1123 break;
1124 }
1137 break; 1125 break;
1138 } 1126 }
1139 1127
@@ -1390,6 +1378,9 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata)
1390 IEEE80211_STA_CONNECTION_POLL)) 1378 IEEE80211_STA_CONNECTION_POLL))
1391 return false; 1379 return false;
1392 1380
1381 if (!mgd->have_beacon)
1382 return false;
1383
1393 rcu_read_lock(); 1384 rcu_read_lock();
1394 sta = sta_info_get(sdata, mgd->bssid); 1385 sta = sta_info_get(sdata, mgd->bssid);
1395 if (sta) 1386 if (sta)
@@ -1798,7 +1789,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
1798 1789
1799 ieee80211_led_assoc(local, 1); 1790 ieee80211_led_assoc(local, 1);
1800 1791
1801 if (sdata->u.mgd.assoc_data->have_beacon) { 1792 if (sdata->u.mgd.have_beacon) {
1802 /* 1793 /*
1803 * If the AP is buggy we may get here with no DTIM period 1794 * If the AP is buggy we may get here with no DTIM period
1804 * known, so assume it's 1 which is the only safe assumption 1795 * known, so assume it's 1 which is the only safe assumption
@@ -1806,8 +1797,10 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
1806 * probably just won't work at all. 1797 * probably just won't work at all.
1807 */ 1798 */
1808 bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1; 1799 bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1;
1809 bss_info_changed |= BSS_CHANGED_DTIM_PERIOD; 1800 bss_conf->beacon_rate = bss->beacon_rate;
1801 bss_info_changed |= BSS_CHANGED_BEACON_INFO;
1810 } else { 1802 } else {
1803 bss_conf->beacon_rate = NULL;
1811 bss_conf->dtim_period = 0; 1804 bss_conf->dtim_period = 0;
1812 } 1805 }
1813 1806
@@ -1842,7 +1835,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1842 struct ieee80211_local *local = sdata->local; 1835 struct ieee80211_local *local = sdata->local;
1843 u32 changed = 0; 1836 u32 changed = 0;
1844 1837
1845 ASSERT_MGD_MTX(ifmgd); 1838 sdata_assert_lock(sdata);
1846 1839
1847 if (WARN_ON_ONCE(tx && !frame_buf)) 1840 if (WARN_ON_ONCE(tx && !frame_buf))
1848 return; 1841 return;
@@ -1930,6 +1923,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1930 del_timer_sync(&sdata->u.mgd.chswitch_timer); 1923 del_timer_sync(&sdata->u.mgd.chswitch_timer);
1931 1924
1932 sdata->vif.bss_conf.dtim_period = 0; 1925 sdata->vif.bss_conf.dtim_period = 0;
1926 sdata->vif.bss_conf.beacon_rate = NULL;
1927
1928 ifmgd->have_beacon = false;
1933 1929
1934 ifmgd->flags = 0; 1930 ifmgd->flags = 0;
1935 ieee80211_vif_release_channel(sdata); 1931 ieee80211_vif_release_channel(sdata);
@@ -2051,7 +2047,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
2051 } 2047 }
2052 2048
2053 ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); 2049 ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
2054 run_again(ifmgd, ifmgd->probe_timeout); 2050 run_again(sdata, ifmgd->probe_timeout);
2055 if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 2051 if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
2056 ieee80211_flush_queues(sdata->local, sdata); 2052 ieee80211_flush_queues(sdata->local, sdata);
2057} 2053}
@@ -2065,7 +2061,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
2065 if (!ieee80211_sdata_running(sdata)) 2061 if (!ieee80211_sdata_running(sdata))
2066 return; 2062 return;
2067 2063
2068 mutex_lock(&ifmgd->mtx); 2064 sdata_lock(sdata);
2069 2065
2070 if (!ifmgd->associated) 2066 if (!ifmgd->associated)
2071 goto out; 2067 goto out;
@@ -2119,7 +2115,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
2119 ifmgd->probe_send_count = 0; 2115 ifmgd->probe_send_count = 0;
2120 ieee80211_mgd_probe_ap_send(sdata); 2116 ieee80211_mgd_probe_ap_send(sdata);
2121 out: 2117 out:
2122 mutex_unlock(&ifmgd->mtx); 2118 sdata_unlock(sdata);
2123} 2119}
2124 2120
2125struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, 2121struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
@@ -2135,7 +2131,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
2135 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) 2131 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
2136 return NULL; 2132 return NULL;
2137 2133
2138 ASSERT_MGD_MTX(ifmgd); 2134 sdata_assert_lock(sdata);
2139 2135
2140 if (ifmgd->associated) 2136 if (ifmgd->associated)
2141 cbss = ifmgd->associated; 2137 cbss = ifmgd->associated;
@@ -2168,9 +2164,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
2168 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 2164 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2169 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; 2165 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
2170 2166
2171 mutex_lock(&ifmgd->mtx); 2167 sdata_lock(sdata);
2172 if (!ifmgd->associated) { 2168 if (!ifmgd->associated) {
2173 mutex_unlock(&ifmgd->mtx); 2169 sdata_unlock(sdata);
2174 return; 2170 return;
2175 } 2171 }
2176 2172
@@ -2181,13 +2177,10 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
2181 ieee80211_wake_queues_by_reason(&sdata->local->hw, 2177 ieee80211_wake_queues_by_reason(&sdata->local->hw,
2182 IEEE80211_MAX_QUEUE_MAP, 2178 IEEE80211_MAX_QUEUE_MAP,
2183 IEEE80211_QUEUE_STOP_REASON_CSA); 2179 IEEE80211_QUEUE_STOP_REASON_CSA);
2184 mutex_unlock(&ifmgd->mtx);
2185 2180
2186 /* 2181 cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
2187 * must be outside lock due to cfg80211, 2182 IEEE80211_DEAUTH_FRAME_LEN);
2188 * but that's not a problem. 2183 sdata_unlock(sdata);
2189 */
2190 cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN);
2191} 2184}
2192 2185
2193static void ieee80211_beacon_connection_loss_work(struct work_struct *work) 2186static void ieee80211_beacon_connection_loss_work(struct work_struct *work)
@@ -2254,7 +2247,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
2254{ 2247{
2255 struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; 2248 struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data;
2256 2249
2257 lockdep_assert_held(&sdata->u.mgd.mtx); 2250 sdata_assert_lock(sdata);
2258 2251
2259 if (!assoc) { 2252 if (!assoc) {
2260 sta_info_destroy_addr(sdata, auth_data->bss->bssid); 2253 sta_info_destroy_addr(sdata, auth_data->bss->bssid);
@@ -2295,27 +2288,26 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
2295 auth_data->key_idx, tx_flags); 2288 auth_data->key_idx, tx_flags);
2296} 2289}
2297 2290
2298static enum rx_mgmt_action __must_check 2291static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
2299ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, 2292 struct ieee80211_mgmt *mgmt, size_t len)
2300 struct ieee80211_mgmt *mgmt, size_t len)
2301{ 2293{
2302 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 2294 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2303 u8 bssid[ETH_ALEN]; 2295 u8 bssid[ETH_ALEN];
2304 u16 auth_alg, auth_transaction, status_code; 2296 u16 auth_alg, auth_transaction, status_code;
2305 struct sta_info *sta; 2297 struct sta_info *sta;
2306 2298
2307 lockdep_assert_held(&ifmgd->mtx); 2299 sdata_assert_lock(sdata);
2308 2300
2309 if (len < 24 + 6) 2301 if (len < 24 + 6)
2310 return RX_MGMT_NONE; 2302 return;
2311 2303
2312 if (!ifmgd->auth_data || ifmgd->auth_data->done) 2304 if (!ifmgd->auth_data || ifmgd->auth_data->done)
2313 return RX_MGMT_NONE; 2305 return;
2314 2306
2315 memcpy(bssid, ifmgd->auth_data->bss->bssid, ETH_ALEN); 2307 memcpy(bssid, ifmgd->auth_data->bss->bssid, ETH_ALEN);
2316 2308
2317 if (!ether_addr_equal(bssid, mgmt->bssid)) 2309 if (!ether_addr_equal(bssid, mgmt->bssid))
2318 return RX_MGMT_NONE; 2310 return;
2319 2311
2320 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); 2312 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
2321 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); 2313 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
@@ -2327,14 +2319,15 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
2327 mgmt->sa, auth_alg, ifmgd->auth_data->algorithm, 2319 mgmt->sa, auth_alg, ifmgd->auth_data->algorithm,
2328 auth_transaction, 2320 auth_transaction,
2329 ifmgd->auth_data->expected_transaction); 2321 ifmgd->auth_data->expected_transaction);
2330 return RX_MGMT_NONE; 2322 return;
2331 } 2323 }
2332 2324
2333 if (status_code != WLAN_STATUS_SUCCESS) { 2325 if (status_code != WLAN_STATUS_SUCCESS) {
2334 sdata_info(sdata, "%pM denied authentication (status %d)\n", 2326 sdata_info(sdata, "%pM denied authentication (status %d)\n",
2335 mgmt->sa, status_code); 2327 mgmt->sa, status_code);
2336 ieee80211_destroy_auth_data(sdata, false); 2328 ieee80211_destroy_auth_data(sdata, false);
2337 return RX_MGMT_CFG80211_RX_AUTH; 2329 cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
2330 return;
2338 } 2331 }
2339 2332
2340 switch (ifmgd->auth_data->algorithm) { 2333 switch (ifmgd->auth_data->algorithm) {
@@ -2347,20 +2340,20 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
2347 if (ifmgd->auth_data->expected_transaction != 4) { 2340 if (ifmgd->auth_data->expected_transaction != 4) {
2348 ieee80211_auth_challenge(sdata, mgmt, len); 2341 ieee80211_auth_challenge(sdata, mgmt, len);
2349 /* need another frame */ 2342 /* need another frame */
2350 return RX_MGMT_NONE; 2343 return;
2351 } 2344 }
2352 break; 2345 break;
2353 default: 2346 default:
2354 WARN_ONCE(1, "invalid auth alg %d", 2347 WARN_ONCE(1, "invalid auth alg %d",
2355 ifmgd->auth_data->algorithm); 2348 ifmgd->auth_data->algorithm);
2356 return RX_MGMT_NONE; 2349 return;
2357 } 2350 }
2358 2351
2359 sdata_info(sdata, "authenticated\n"); 2352 sdata_info(sdata, "authenticated\n");
2360 ifmgd->auth_data->done = true; 2353 ifmgd->auth_data->done = true;
2361 ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; 2354 ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC;
2362 ifmgd->auth_data->timeout_started = true; 2355 ifmgd->auth_data->timeout_started = true;
2363 run_again(ifmgd, ifmgd->auth_data->timeout); 2356 run_again(sdata, ifmgd->auth_data->timeout);
2364 2357
2365 if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && 2358 if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
2366 ifmgd->auth_data->expected_transaction != 2) { 2359 ifmgd->auth_data->expected_transaction != 2) {
@@ -2368,7 +2361,8 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
2368 * Report auth frame to user space for processing since another 2361 * Report auth frame to user space for processing since another
2369 * round of Authentication frames is still needed. 2362 * round of Authentication frames is still needed.
2370 */ 2363 */
2371 return RX_MGMT_CFG80211_RX_AUTH; 2364 cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
2365 return;
2372 } 2366 }
2373 2367
2374 /* move station state to auth */ 2368 /* move station state to auth */
@@ -2384,30 +2378,29 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
2384 } 2378 }
2385 mutex_unlock(&sdata->local->sta_mtx); 2379 mutex_unlock(&sdata->local->sta_mtx);
2386 2380
2387 return RX_MGMT_CFG80211_RX_AUTH; 2381 cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
2382 return;
2388 out_err: 2383 out_err:
2389 mutex_unlock(&sdata->local->sta_mtx); 2384 mutex_unlock(&sdata->local->sta_mtx);
2390 /* ignore frame -- wait for timeout */ 2385 /* ignore frame -- wait for timeout */
2391 return RX_MGMT_NONE;
2392} 2386}
2393 2387
2394 2388
2395static enum rx_mgmt_action __must_check 2389static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
2396ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, 2390 struct ieee80211_mgmt *mgmt, size_t len)
2397 struct ieee80211_mgmt *mgmt, size_t len)
2398{ 2391{
2399 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 2392 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2400 const u8 *bssid = NULL; 2393 const u8 *bssid = NULL;
2401 u16 reason_code; 2394 u16 reason_code;
2402 2395
2403 lockdep_assert_held(&ifmgd->mtx); 2396 sdata_assert_lock(sdata);
2404 2397
2405 if (len < 24 + 2) 2398 if (len < 24 + 2)
2406 return RX_MGMT_NONE; 2399 return;
2407 2400
2408 if (!ifmgd->associated || 2401 if (!ifmgd->associated ||
2409 !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) 2402 !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
2410 return RX_MGMT_NONE; 2403 return;
2411 2404
2412 bssid = ifmgd->associated->bssid; 2405 bssid = ifmgd->associated->bssid;
2413 2406
@@ -2418,25 +2411,24 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
2418 2411
2419 ieee80211_set_disassoc(sdata, 0, 0, false, NULL); 2412 ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
2420 2413
2421 return RX_MGMT_CFG80211_DEAUTH; 2414 cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
2422} 2415}
2423 2416
2424 2417
2425static enum rx_mgmt_action __must_check 2418static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
2426ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, 2419 struct ieee80211_mgmt *mgmt, size_t len)
2427 struct ieee80211_mgmt *mgmt, size_t len)
2428{ 2420{
2429 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 2421 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2430 u16 reason_code; 2422 u16 reason_code;
2431 2423
2432 lockdep_assert_held(&ifmgd->mtx); 2424 sdata_assert_lock(sdata);
2433 2425
2434 if (len < 24 + 2) 2426 if (len < 24 + 2)
2435 return RX_MGMT_NONE; 2427 return;
2436 2428
2437 if (!ifmgd->associated || 2429 if (!ifmgd->associated ||
2438 !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) 2430 !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
2439 return RX_MGMT_NONE; 2431 return;
2440 2432
2441 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); 2433 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
2442 2434
@@ -2445,22 +2437,23 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
2445 2437
2446 ieee80211_set_disassoc(sdata, 0, 0, false, NULL); 2438 ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
2447 2439
2448 return RX_MGMT_CFG80211_DISASSOC; 2440 cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
2449} 2441}
2450 2442
2451static void ieee80211_get_rates(struct ieee80211_supported_band *sband, 2443static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
2452 u8 *supp_rates, unsigned int supp_rates_len, 2444 u8 *supp_rates, unsigned int supp_rates_len,
2453 u32 *rates, u32 *basic_rates, 2445 u32 *rates, u32 *basic_rates,
2454 bool *have_higher_than_11mbit, 2446 bool *have_higher_than_11mbit,
2455 int *min_rate, int *min_rate_index) 2447 int *min_rate, int *min_rate_index,
2448 int shift, u32 rate_flags)
2456{ 2449{
2457 int i, j; 2450 int i, j;
2458 2451
2459 for (i = 0; i < supp_rates_len; i++) { 2452 for (i = 0; i < supp_rates_len; i++) {
2460 int rate = (supp_rates[i] & 0x7f) * 5; 2453 int rate = supp_rates[i] & 0x7f;
2461 bool is_basic = !!(supp_rates[i] & 0x80); 2454 bool is_basic = !!(supp_rates[i] & 0x80);
2462 2455
2463 if (rate > 110) 2456 if ((rate * 5 * (1 << shift)) > 110)
2464 *have_higher_than_11mbit = true; 2457 *have_higher_than_11mbit = true;
2465 2458
2466 /* 2459 /*
@@ -2476,12 +2469,20 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
2476 continue; 2469 continue;
2477 2470
2478 for (j = 0; j < sband->n_bitrates; j++) { 2471 for (j = 0; j < sband->n_bitrates; j++) {
2479 if (sband->bitrates[j].bitrate == rate) { 2472 struct ieee80211_rate *br;
2473 int brate;
2474
2475 br = &sband->bitrates[j];
2476 if ((rate_flags & br->flags) != rate_flags)
2477 continue;
2478
2479 brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
2480 if (brate == rate) {
2480 *rates |= BIT(j); 2481 *rates |= BIT(j);
2481 if (is_basic) 2482 if (is_basic)
2482 *basic_rates |= BIT(j); 2483 *basic_rates |= BIT(j);
2483 if (rate < *min_rate) { 2484 if ((rate * 5) < *min_rate) {
2484 *min_rate = rate; 2485 *min_rate = rate * 5;
2485 *min_rate_index = j; 2486 *min_rate_index = j;
2486 } 2487 }
2487 break; 2488 break;
@@ -2495,7 +2496,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
2495{ 2496{
2496 struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; 2497 struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
2497 2498
2498 lockdep_assert_held(&sdata->u.mgd.mtx); 2499 sdata_assert_lock(sdata);
2499 2500
2500 if (!assoc) { 2501 if (!assoc) {
2501 sta_info_destroy_addr(sdata, assoc_data->bss->bssid); 2502 sta_info_destroy_addr(sdata, assoc_data->bss->bssid);
@@ -2522,8 +2523,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
2522 u16 capab_info, aid; 2523 u16 capab_info, aid;
2523 struct ieee802_11_elems elems; 2524 struct ieee802_11_elems elems;
2524 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; 2525 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
2526 const struct cfg80211_bss_ies *bss_ies = NULL;
2527 struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
2525 u32 changed = 0; 2528 u32 changed = 0;
2526 int err; 2529 int err;
2530 bool ret;
2527 2531
2528 /* AssocResp and ReassocResp have identical structure */ 2532 /* AssocResp and ReassocResp have identical structure */
2529 2533
@@ -2555,21 +2559,86 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
2555 ifmgd->aid = aid; 2559 ifmgd->aid = aid;
2556 2560
2557 /* 2561 /*
2562 * Some APs are erroneously not including some information in their
2563 * (re)association response frames. Try to recover by using the data
2564 * from the beacon or probe response. This seems to afflict mobile
2565 * 2G/3G/4G wifi routers, reported models include the "Onda PN51T",
2566 * "Vodafone PocketWiFi 2", "ZTE MF60" and a similar T-Mobile device.
2567 */
2568 if ((assoc_data->wmm && !elems.wmm_param) ||
2569 (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
2570 (!elems.ht_cap_elem || !elems.ht_operation)) ||
2571 (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
2572 (!elems.vht_cap_elem || !elems.vht_operation))) {
2573 const struct cfg80211_bss_ies *ies;
2574 struct ieee802_11_elems bss_elems;
2575
2576 rcu_read_lock();
2577 ies = rcu_dereference(cbss->ies);
2578 if (ies)
2579 bss_ies = kmemdup(ies, sizeof(*ies) + ies->len,
2580 GFP_ATOMIC);
2581 rcu_read_unlock();
2582 if (!bss_ies)
2583 return false;
2584
2585 ieee802_11_parse_elems(bss_ies->data, bss_ies->len,
2586 false, &bss_elems);
2587 if (assoc_data->wmm &&
2588 !elems.wmm_param && bss_elems.wmm_param) {
2589 elems.wmm_param = bss_elems.wmm_param;
2590 sdata_info(sdata,
2591 "AP bug: WMM param missing from AssocResp\n");
2592 }
2593
2594 /*
2595 * Also check if we requested HT/VHT, otherwise the AP doesn't
2596 * have to include the IEs in the (re)association response.
2597 */
2598 if (!elems.ht_cap_elem && bss_elems.ht_cap_elem &&
2599 !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
2600 elems.ht_cap_elem = bss_elems.ht_cap_elem;
2601 sdata_info(sdata,
2602 "AP bug: HT capability missing from AssocResp\n");
2603 }
2604 if (!elems.ht_operation && bss_elems.ht_operation &&
2605 !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
2606 elems.ht_operation = bss_elems.ht_operation;
2607 sdata_info(sdata,
2608 "AP bug: HT operation missing from AssocResp\n");
2609 }
2610 if (!elems.vht_cap_elem && bss_elems.vht_cap_elem &&
2611 !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
2612 elems.vht_cap_elem = bss_elems.vht_cap_elem;
2613 sdata_info(sdata,
2614 "AP bug: VHT capa missing from AssocResp\n");
2615 }
2616 if (!elems.vht_operation && bss_elems.vht_operation &&
2617 !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
2618 elems.vht_operation = bss_elems.vht_operation;
2619 sdata_info(sdata,
2620 "AP bug: VHT operation missing from AssocResp\n");
2621 }
2622 }
2623
2624 /*
2558 * We previously checked these in the beacon/probe response, so 2625 * We previously checked these in the beacon/probe response, so
2559 * they should be present here. This is just a safety net. 2626 * they should be present here. This is just a safety net.
2560 */ 2627 */
2561 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && 2628 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
2562 (!elems.wmm_param || !elems.ht_cap_elem || !elems.ht_operation)) { 2629 (!elems.wmm_param || !elems.ht_cap_elem || !elems.ht_operation)) {
2563 sdata_info(sdata, 2630 sdata_info(sdata,
2564 "HT AP is missing WMM params or HT capability/operation in AssocResp\n"); 2631 "HT AP is missing WMM params or HT capability/operation\n");
2565 return false; 2632 ret = false;
2633 goto out;
2566 } 2634 }
2567 2635
2568 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && 2636 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
2569 (!elems.vht_cap_elem || !elems.vht_operation)) { 2637 (!elems.vht_cap_elem || !elems.vht_operation)) {
2570 sdata_info(sdata, 2638 sdata_info(sdata,
2571 "VHT AP is missing VHT capability/operation in AssocResp\n"); 2639 "VHT AP is missing VHT capability/operation\n");
2572 return false; 2640 ret = false;
2641 goto out;
2573 } 2642 }
2574 2643
2575 mutex_lock(&sdata->local->sta_mtx); 2644 mutex_lock(&sdata->local->sta_mtx);
@@ -2580,7 +2649,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
2580 sta = sta_info_get(sdata, cbss->bssid); 2649 sta = sta_info_get(sdata, cbss->bssid);
2581 if (WARN_ON(!sta)) { 2650 if (WARN_ON(!sta)) {
2582 mutex_unlock(&sdata->local->sta_mtx); 2651 mutex_unlock(&sdata->local->sta_mtx);
2583 return false; 2652 ret = false;
2653 goto out;
2584 } 2654 }
2585 2655
2586 sband = local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)]; 2656 sband = local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)];
@@ -2633,7 +2703,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
2633 sta->sta.addr); 2703 sta->sta.addr);
2634 WARN_ON(__sta_info_destroy(sta)); 2704 WARN_ON(__sta_info_destroy(sta));
2635 mutex_unlock(&sdata->local->sta_mtx); 2705 mutex_unlock(&sdata->local->sta_mtx);
2636 return false; 2706 ret = false;
2707 goto out;
2637 } 2708 }
2638 2709
2639 mutex_unlock(&sdata->local->sta_mtx); 2710 mutex_unlock(&sdata->local->sta_mtx);
@@ -2673,13 +2744,15 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
2673 ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt); 2744 ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt);
2674 ieee80211_sta_reset_beacon_monitor(sdata); 2745 ieee80211_sta_reset_beacon_monitor(sdata);
2675 2746
2676 return true; 2747 ret = true;
2748 out:
2749 kfree(bss_ies);
2750 return ret;
2677} 2751}
2678 2752
2679static enum rx_mgmt_action __must_check 2753static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
2680ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, 2754 struct ieee80211_mgmt *mgmt,
2681 struct ieee80211_mgmt *mgmt, size_t len, 2755 size_t len)
2682 struct cfg80211_bss **bss)
2683{ 2756{
2684 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 2757 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2685 struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data; 2758 struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
@@ -2687,13 +2760,14 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
2687 struct ieee802_11_elems elems; 2760 struct ieee802_11_elems elems;
2688 u8 *pos; 2761 u8 *pos;
2689 bool reassoc; 2762 bool reassoc;
2763 struct cfg80211_bss *bss;
2690 2764
2691 lockdep_assert_held(&ifmgd->mtx); 2765 sdata_assert_lock(sdata);
2692 2766
2693 if (!assoc_data) 2767 if (!assoc_data)
2694 return RX_MGMT_NONE; 2768 return;
2695 if (!ether_addr_equal(assoc_data->bss->bssid, mgmt->bssid)) 2769 if (!ether_addr_equal(assoc_data->bss->bssid, mgmt->bssid))
2696 return RX_MGMT_NONE; 2770 return;
2697 2771
2698 /* 2772 /*
2699 * AssocResp and ReassocResp have identical structure, so process both 2773 * AssocResp and ReassocResp have identical structure, so process both
@@ -2701,7 +2775,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
2701 */ 2775 */
2702 2776
2703 if (len < 24 + 6) 2777 if (len < 24 + 6)
2704 return RX_MGMT_NONE; 2778 return;
2705 2779
2706 reassoc = ieee80211_is_reassoc_req(mgmt->frame_control); 2780 reassoc = ieee80211_is_reassoc_req(mgmt->frame_control);
2707 capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); 2781 capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
@@ -2728,22 +2802,22 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
2728 assoc_data->timeout = jiffies + msecs_to_jiffies(ms); 2802 assoc_data->timeout = jiffies + msecs_to_jiffies(ms);
2729 assoc_data->timeout_started = true; 2803 assoc_data->timeout_started = true;
2730 if (ms > IEEE80211_ASSOC_TIMEOUT) 2804 if (ms > IEEE80211_ASSOC_TIMEOUT)
2731 run_again(ifmgd, assoc_data->timeout); 2805 run_again(sdata, assoc_data->timeout);
2732 return RX_MGMT_NONE; 2806 return;
2733 } 2807 }
2734 2808
2735 *bss = assoc_data->bss; 2809 bss = assoc_data->bss;
2736 2810
2737 if (status_code != WLAN_STATUS_SUCCESS) { 2811 if (status_code != WLAN_STATUS_SUCCESS) {
2738 sdata_info(sdata, "%pM denied association (code=%d)\n", 2812 sdata_info(sdata, "%pM denied association (code=%d)\n",
2739 mgmt->sa, status_code); 2813 mgmt->sa, status_code);
2740 ieee80211_destroy_assoc_data(sdata, false); 2814 ieee80211_destroy_assoc_data(sdata, false);
2741 } else { 2815 } else {
2742 if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { 2816 if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) {
2743 /* oops -- internal error -- send timeout for now */ 2817 /* oops -- internal error -- send timeout for now */
2744 ieee80211_destroy_assoc_data(sdata, false); 2818 ieee80211_destroy_assoc_data(sdata, false);
2745 cfg80211_put_bss(sdata->local->hw.wiphy, *bss); 2819 cfg80211_assoc_timeout(sdata->dev, bss);
2746 return RX_MGMT_CFG80211_ASSOC_TIMEOUT; 2820 return;
2747 } 2821 }
2748 sdata_info(sdata, "associated\n"); 2822 sdata_info(sdata, "associated\n");
2749 2823
@@ -2755,7 +2829,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
2755 ieee80211_destroy_assoc_data(sdata, true); 2829 ieee80211_destroy_assoc_data(sdata, true);
2756 } 2830 }
2757 2831
2758 return RX_MGMT_CFG80211_RX_ASSOC; 2832 cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len);
2759} 2833}
2760 2834
2761static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, 2835static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
@@ -2767,23 +2841,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
2767 int freq; 2841 int freq;
2768 struct ieee80211_bss *bss; 2842 struct ieee80211_bss *bss;
2769 struct ieee80211_channel *channel; 2843 struct ieee80211_channel *channel;
2770 bool need_ps = false;
2771 2844
2772 lockdep_assert_held(&sdata->u.mgd.mtx); 2845 sdata_assert_lock(sdata);
2773
2774 if ((sdata->u.mgd.associated &&
2775 ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) ||
2776 (sdata->u.mgd.assoc_data &&
2777 ether_addr_equal(mgmt->bssid,
2778 sdata->u.mgd.assoc_data->bss->bssid))) {
2779 /* not previously set so we may need to recalc */
2780 need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period;
2781
2782 if (elems->tim && !elems->parse_error) {
2783 const struct ieee80211_tim_ie *tim_ie = elems->tim;
2784 sdata->u.mgd.dtim_period = tim_ie->dtim_period;
2785 }
2786 }
2787 2846
2788 if (elems->ds_params) 2847 if (elems->ds_params)
2789 freq = ieee80211_channel_to_frequency(elems->ds_params[0], 2848 freq = ieee80211_channel_to_frequency(elems->ds_params[0],
@@ -2798,22 +2857,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
2798 2857
2799 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, 2858 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
2800 channel); 2859 channel);
2801 if (bss) 2860 if (bss) {
2802 ieee80211_rx_bss_put(local, bss); 2861 ieee80211_rx_bss_put(local, bss);
2803 2862 sdata->vif.bss_conf.beacon_rate = bss->beacon_rate;
2804 if (!sdata->u.mgd.associated ||
2805 !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid))
2806 return;
2807
2808 if (need_ps) {
2809 mutex_lock(&local->iflist_mtx);
2810 ieee80211_recalc_ps(local, -1);
2811 mutex_unlock(&local->iflist_mtx);
2812 } 2863 }
2813
2814 ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
2815 elems, true);
2816
2817} 2864}
2818 2865
2819 2866
@@ -2828,7 +2875,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
2828 2875
2829 ifmgd = &sdata->u.mgd; 2876 ifmgd = &sdata->u.mgd;
2830 2877
2831 ASSERT_MGD_MTX(ifmgd); 2878 sdata_assert_lock(sdata);
2832 2879
2833 if (!ether_addr_equal(mgmt->da, sdata->vif.addr)) 2880 if (!ether_addr_equal(mgmt->da, sdata->vif.addr))
2834 return; /* ignore ProbeResp to foreign address */ 2881 return; /* ignore ProbeResp to foreign address */
@@ -2853,7 +2900,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
2853 ifmgd->auth_data->tries = 0; 2900 ifmgd->auth_data->tries = 0;
2854 ifmgd->auth_data->timeout = jiffies; 2901 ifmgd->auth_data->timeout = jiffies;
2855 ifmgd->auth_data->timeout_started = true; 2902 ifmgd->auth_data->timeout_started = true;
2856 run_again(ifmgd, ifmgd->auth_data->timeout); 2903 run_again(sdata, ifmgd->auth_data->timeout);
2857 } 2904 }
2858} 2905}
2859 2906
@@ -2878,10 +2925,9 @@ static const u64 care_about_ies =
2878 (1ULL << WLAN_EID_HT_CAPABILITY) | 2925 (1ULL << WLAN_EID_HT_CAPABILITY) |
2879 (1ULL << WLAN_EID_HT_OPERATION); 2926 (1ULL << WLAN_EID_HT_OPERATION);
2880 2927
2881static enum rx_mgmt_action 2928static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
2882ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, 2929 struct ieee80211_mgmt *mgmt, size_t len,
2883 struct ieee80211_mgmt *mgmt, size_t len, 2930 struct ieee80211_rx_status *rx_status)
2884 u8 *deauth_buf, struct ieee80211_rx_status *rx_status)
2885{ 2931{
2886 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 2932 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2887 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; 2933 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
@@ -2896,24 +2942,25 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
2896 u8 erp_value = 0; 2942 u8 erp_value = 0;
2897 u32 ncrc; 2943 u32 ncrc;
2898 u8 *bssid; 2944 u8 *bssid;
2945 u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
2899 2946
2900 lockdep_assert_held(&ifmgd->mtx); 2947 sdata_assert_lock(sdata);
2901 2948
2902 /* Process beacon from the current BSS */ 2949 /* Process beacon from the current BSS */
2903 baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; 2950 baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
2904 if (baselen > len) 2951 if (baselen > len)
2905 return RX_MGMT_NONE; 2952 return;
2906 2953
2907 rcu_read_lock(); 2954 rcu_read_lock();
2908 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); 2955 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
2909 if (!chanctx_conf) { 2956 if (!chanctx_conf) {
2910 rcu_read_unlock(); 2957 rcu_read_unlock();
2911 return RX_MGMT_NONE; 2958 return;
2912 } 2959 }
2913 2960
2914 if (rx_status->freq != chanctx_conf->def.chan->center_freq) { 2961 if (rx_status->freq != chanctx_conf->def.chan->center_freq) {
2915 rcu_read_unlock(); 2962 rcu_read_unlock();
2916 return RX_MGMT_NONE; 2963 return;
2917 } 2964 }
2918 chan = chanctx_conf->def.chan; 2965 chan = chanctx_conf->def.chan;
2919 rcu_read_unlock(); 2966 rcu_read_unlock();
@@ -2924,7 +2971,11 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
2924 len - baselen, false, &elems); 2971 len - baselen, false, &elems);
2925 2972
2926 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); 2973 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
2927 ifmgd->assoc_data->have_beacon = true; 2974 if (elems.tim && !elems.parse_error) {
2975 const struct ieee80211_tim_ie *tim_ie = elems.tim;
2976 ifmgd->dtim_period = tim_ie->dtim_period;
2977 }
2978 ifmgd->have_beacon = true;
2928 ifmgd->assoc_data->need_beacon = false; 2979 ifmgd->assoc_data->need_beacon = false;
2929 if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { 2980 if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) {
2930 sdata->vif.bss_conf.sync_tsf = 2981 sdata->vif.bss_conf.sync_tsf =
@@ -2940,13 +2991,13 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
2940 /* continue assoc process */ 2991 /* continue assoc process */
2941 ifmgd->assoc_data->timeout = jiffies; 2992 ifmgd->assoc_data->timeout = jiffies;
2942 ifmgd->assoc_data->timeout_started = true; 2993 ifmgd->assoc_data->timeout_started = true;
2943 run_again(ifmgd, ifmgd->assoc_data->timeout); 2994 run_again(sdata, ifmgd->assoc_data->timeout);
2944 return RX_MGMT_NONE; 2995 return;
2945 } 2996 }
2946 2997
2947 if (!ifmgd->associated || 2998 if (!ifmgd->associated ||
2948 !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) 2999 !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
2949 return RX_MGMT_NONE; 3000 return;
2950 bssid = ifmgd->associated->bssid; 3001 bssid = ifmgd->associated->bssid;
2951 3002
2952 /* Track average RSSI from the Beacon frames of the current AP */ 3003 /* Track average RSSI from the Beacon frames of the current AP */
@@ -3092,12 +3143,15 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3092 } 3143 }
3093 3144
3094 if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) 3145 if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid)
3095 return RX_MGMT_NONE; 3146 return;
3096 ifmgd->beacon_crc = ncrc; 3147 ifmgd->beacon_crc = ncrc;
3097 ifmgd->beacon_crc_valid = true; 3148 ifmgd->beacon_crc_valid = true;
3098 3149
3099 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); 3150 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
3100 3151
3152 ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
3153 &elems, true);
3154
3101 if (ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, 3155 if (ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
3102 elems.wmm_param_len)) 3156 elems.wmm_param_len))
3103 changed |= BSS_CHANGED_QOS; 3157 changed |= BSS_CHANGED_QOS;
@@ -3106,7 +3160,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3106 * If we haven't had a beacon before, tell the driver about the 3160 * If we haven't had a beacon before, tell the driver about the
3107 * DTIM period (and beacon timing if desired) now. 3161 * DTIM period (and beacon timing if desired) now.
3108 */ 3162 */
3109 if (!bss_conf->dtim_period) { 3163 if (!ifmgd->have_beacon) {
3110 /* a few bogus AP send dtim_period = 0 or no TIM IE */ 3164 /* a few bogus AP send dtim_period = 0 or no TIM IE */
3111 if (elems.tim) 3165 if (elems.tim)
3112 bss_conf->dtim_period = elems.tim->dtim_period ?: 1; 3166 bss_conf->dtim_period = elems.tim->dtim_period ?: 1;
@@ -3125,7 +3179,14 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3125 sdata->vif.bss_conf.sync_dtim_count = 0; 3179 sdata->vif.bss_conf.sync_dtim_count = 0;
3126 } 3180 }
3127 3181
3128 changed |= BSS_CHANGED_DTIM_PERIOD; 3182 changed |= BSS_CHANGED_BEACON_INFO;
3183 ifmgd->have_beacon = true;
3184
3185 mutex_lock(&local->iflist_mtx);
3186 ieee80211_recalc_ps(local, -1);
3187 mutex_unlock(&local->iflist_mtx);
3188
3189 ieee80211_recalc_ps_vif(sdata);
3129 } 3190 }
3130 3191
3131 if (elems.erp_info) { 3192 if (elems.erp_info) {
@@ -3147,7 +3208,9 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3147 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, 3208 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
3148 WLAN_REASON_DEAUTH_LEAVING, 3209 WLAN_REASON_DEAUTH_LEAVING,
3149 true, deauth_buf); 3210 true, deauth_buf);
3150 return RX_MGMT_CFG80211_TX_DEAUTH; 3211 cfg80211_tx_mlme_mgmt(sdata->dev, deauth_buf,
3212 sizeof(deauth_buf));
3213 return;
3151 } 3214 }
3152 3215
3153 if (sta && elems.opmode_notif) 3216 if (sta && elems.opmode_notif)
@@ -3164,19 +3227,13 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3164 elems.pwr_constr_elem); 3227 elems.pwr_constr_elem);
3165 3228
3166 ieee80211_bss_info_change_notify(sdata, changed); 3229 ieee80211_bss_info_change_notify(sdata, changed);
3167
3168 return RX_MGMT_NONE;
3169} 3230}
3170 3231
3171void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 3232void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
3172 struct sk_buff *skb) 3233 struct sk_buff *skb)
3173{ 3234{
3174 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3175 struct ieee80211_rx_status *rx_status; 3235 struct ieee80211_rx_status *rx_status;
3176 struct ieee80211_mgmt *mgmt; 3236 struct ieee80211_mgmt *mgmt;
3177 struct cfg80211_bss *bss = NULL;
3178 enum rx_mgmt_action rma = RX_MGMT_NONE;
3179 u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
3180 u16 fc; 3237 u16 fc;
3181 struct ieee802_11_elems elems; 3238 struct ieee802_11_elems elems;
3182 int ies_len; 3239 int ies_len;
@@ -3185,28 +3242,27 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
3185 mgmt = (struct ieee80211_mgmt *) skb->data; 3242 mgmt = (struct ieee80211_mgmt *) skb->data;
3186 fc = le16_to_cpu(mgmt->frame_control); 3243 fc = le16_to_cpu(mgmt->frame_control);
3187 3244
3188 mutex_lock(&ifmgd->mtx); 3245 sdata_lock(sdata);
3189 3246
3190 switch (fc & IEEE80211_FCTL_STYPE) { 3247 switch (fc & IEEE80211_FCTL_STYPE) {
3191 case IEEE80211_STYPE_BEACON: 3248 case IEEE80211_STYPE_BEACON:
3192 rma = ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, 3249 ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status);
3193 deauth_buf, rx_status);
3194 break; 3250 break;
3195 case IEEE80211_STYPE_PROBE_RESP: 3251 case IEEE80211_STYPE_PROBE_RESP:
3196 ieee80211_rx_mgmt_probe_resp(sdata, skb); 3252 ieee80211_rx_mgmt_probe_resp(sdata, skb);
3197 break; 3253 break;
3198 case IEEE80211_STYPE_AUTH: 3254 case IEEE80211_STYPE_AUTH:
3199 rma = ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); 3255 ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len);
3200 break; 3256 break;
3201 case IEEE80211_STYPE_DEAUTH: 3257 case IEEE80211_STYPE_DEAUTH:
3202 rma = ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); 3258 ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len);
3203 break; 3259 break;
3204 case IEEE80211_STYPE_DISASSOC: 3260 case IEEE80211_STYPE_DISASSOC:
3205 rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); 3261 ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len);
3206 break; 3262 break;
3207 case IEEE80211_STYPE_ASSOC_RESP: 3263 case IEEE80211_STYPE_ASSOC_RESP:
3208 case IEEE80211_STYPE_REASSOC_RESP: 3264 case IEEE80211_STYPE_REASSOC_RESP:
3209 rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss); 3265 ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len);
3210 break; 3266 break;
3211 case IEEE80211_STYPE_ACTION: 3267 case IEEE80211_STYPE_ACTION:
3212 if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { 3268 if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) {
@@ -3252,34 +3308,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
3252 } 3308 }
3253 break; 3309 break;
3254 } 3310 }
3255 mutex_unlock(&ifmgd->mtx); 3311 sdata_unlock(sdata);
3256
3257 switch (rma) {
3258 case RX_MGMT_NONE:
3259 /* no action */
3260 break;
3261 case RX_MGMT_CFG80211_DEAUTH:
3262 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
3263 break;
3264 case RX_MGMT_CFG80211_DISASSOC:
3265 cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len);
3266 break;
3267 case RX_MGMT_CFG80211_RX_AUTH:
3268 cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, skb->len);
3269 break;
3270 case RX_MGMT_CFG80211_RX_ASSOC:
3271 cfg80211_send_rx_assoc(sdata->dev, bss, (u8 *)mgmt, skb->len);
3272 break;
3273 case RX_MGMT_CFG80211_ASSOC_TIMEOUT:
3274 cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid);
3275 break;
3276 case RX_MGMT_CFG80211_TX_DEAUTH:
3277 cfg80211_send_deauth(sdata->dev, deauth_buf,
3278 sizeof(deauth_buf));
3279 break;
3280 default:
3281 WARN(1, "unexpected: %d", rma);
3282 }
3283} 3312}
3284 3313
3285static void ieee80211_sta_timer(unsigned long data) 3314static void ieee80211_sta_timer(unsigned long data)
@@ -3293,20 +3322,13 @@ static void ieee80211_sta_timer(unsigned long data)
3293static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, 3322static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
3294 u8 *bssid, u8 reason, bool tx) 3323 u8 *bssid, u8 reason, bool tx)
3295{ 3324{
3296 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3297 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; 3325 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
3298 3326
3299 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, 3327 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason,
3300 tx, frame_buf); 3328 tx, frame_buf);
3301 mutex_unlock(&ifmgd->mtx);
3302
3303 /*
3304 * must be outside lock due to cfg80211,
3305 * but that's not a problem.
3306 */
3307 cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN);
3308 3329
3309 mutex_lock(&ifmgd->mtx); 3330 cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
3331 IEEE80211_DEAUTH_FRAME_LEN);
3310} 3332}
3311 3333
3312static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) 3334static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
@@ -3316,7 +3338,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
3316 struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data; 3338 struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data;
3317 u32 tx_flags = 0; 3339 u32 tx_flags = 0;
3318 3340
3319 lockdep_assert_held(&ifmgd->mtx); 3341 sdata_assert_lock(sdata);
3320 3342
3321 if (WARN_ON_ONCE(!auth_data)) 3343 if (WARN_ON_ONCE(!auth_data))
3322 return -EINVAL; 3344 return -EINVAL;
@@ -3388,10 +3410,13 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
3388 3410
3389 if (tx_flags == 0) { 3411 if (tx_flags == 0) {
3390 auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; 3412 auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
3391 ifmgd->auth_data->timeout_started = true; 3413 auth_data->timeout_started = true;
3392 run_again(ifmgd, auth_data->timeout); 3414 run_again(sdata, auth_data->timeout);
3393 } else { 3415 } else {
3394 auth_data->timeout_started = false; 3416 auth_data->timeout =
3417 round_jiffies_up(jiffies + IEEE80211_AUTH_TIMEOUT_LONG);
3418 auth_data->timeout_started = true;
3419 run_again(sdata, auth_data->timeout);
3395 } 3420 }
3396 3421
3397 return 0; 3422 return 0;
@@ -3402,7 +3427,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
3402 struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; 3427 struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
3403 struct ieee80211_local *local = sdata->local; 3428 struct ieee80211_local *local = sdata->local;
3404 3429
3405 lockdep_assert_held(&sdata->u.mgd.mtx); 3430 sdata_assert_lock(sdata);
3406 3431
3407 assoc_data->tries++; 3432 assoc_data->tries++;
3408 if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) { 3433 if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) {
@@ -3426,9 +3451,13 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
3426 if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { 3451 if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
3427 assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; 3452 assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
3428 assoc_data->timeout_started = true; 3453 assoc_data->timeout_started = true;
3429 run_again(&sdata->u.mgd, assoc_data->timeout); 3454 run_again(sdata, assoc_data->timeout);
3430 } else { 3455 } else {
3431 assoc_data->timeout_started = false; 3456 assoc_data->timeout =
3457 round_jiffies_up(jiffies +
3458 IEEE80211_ASSOC_TIMEOUT_LONG);
3459 assoc_data->timeout_started = true;
3460 run_again(sdata, assoc_data->timeout);
3432 } 3461 }
3433 3462
3434 return 0; 3463 return 0;
@@ -3451,7 +3480,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3451 struct ieee80211_local *local = sdata->local; 3480 struct ieee80211_local *local = sdata->local;
3452 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 3481 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3453 3482
3454 mutex_lock(&ifmgd->mtx); 3483 sdata_lock(sdata);
3455 3484
3456 if (ifmgd->status_received) { 3485 if (ifmgd->status_received) {
3457 __le16 fc = ifmgd->status_fc; 3486 __le16 fc = ifmgd->status_fc;
@@ -3463,7 +3492,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3463 if (status_acked) { 3492 if (status_acked) {
3464 ifmgd->auth_data->timeout = 3493 ifmgd->auth_data->timeout =
3465 jiffies + IEEE80211_AUTH_TIMEOUT_SHORT; 3494 jiffies + IEEE80211_AUTH_TIMEOUT_SHORT;
3466 run_again(ifmgd, ifmgd->auth_data->timeout); 3495 run_again(sdata, ifmgd->auth_data->timeout);
3467 } else { 3496 } else {
3468 ifmgd->auth_data->timeout = jiffies - 1; 3497 ifmgd->auth_data->timeout = jiffies - 1;
3469 } 3498 }
@@ -3474,7 +3503,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3474 if (status_acked) { 3503 if (status_acked) {
3475 ifmgd->assoc_data->timeout = 3504 ifmgd->assoc_data->timeout =
3476 jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT; 3505 jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT;
3477 run_again(ifmgd, ifmgd->assoc_data->timeout); 3506 run_again(sdata, ifmgd->assoc_data->timeout);
3478 } else { 3507 } else {
3479 ifmgd->assoc_data->timeout = jiffies - 1; 3508 ifmgd->assoc_data->timeout = jiffies - 1;
3480 } 3509 }
@@ -3497,30 +3526,22 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3497 3526
3498 ieee80211_destroy_auth_data(sdata, false); 3527 ieee80211_destroy_auth_data(sdata, false);
3499 3528
3500 mutex_unlock(&ifmgd->mtx); 3529 cfg80211_auth_timeout(sdata->dev, bssid);
3501 cfg80211_send_auth_timeout(sdata->dev, bssid);
3502 mutex_lock(&ifmgd->mtx);
3503 } 3530 }
3504 } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started) 3531 } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started)
3505 run_again(ifmgd, ifmgd->auth_data->timeout); 3532 run_again(sdata, ifmgd->auth_data->timeout);
3506 3533
3507 if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started && 3534 if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started &&
3508 time_after(jiffies, ifmgd->assoc_data->timeout)) { 3535 time_after(jiffies, ifmgd->assoc_data->timeout)) {
3509 if ((ifmgd->assoc_data->need_beacon && 3536 if ((ifmgd->assoc_data->need_beacon && !ifmgd->have_beacon) ||
3510 !ifmgd->assoc_data->have_beacon) ||
3511 ieee80211_do_assoc(sdata)) { 3537 ieee80211_do_assoc(sdata)) {
3512 u8 bssid[ETH_ALEN]; 3538 struct cfg80211_bss *bss = ifmgd->assoc_data->bss;
3513
3514 memcpy(bssid, ifmgd->assoc_data->bss->bssid, ETH_ALEN);
3515 3539
3516 ieee80211_destroy_assoc_data(sdata, false); 3540 ieee80211_destroy_assoc_data(sdata, false);
3517 3541 cfg80211_assoc_timeout(sdata->dev, bss);
3518 mutex_unlock(&ifmgd->mtx);
3519 cfg80211_send_assoc_timeout(sdata->dev, bssid);
3520 mutex_lock(&ifmgd->mtx);
3521 } 3542 }
3522 } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) 3543 } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started)
3523 run_again(ifmgd, ifmgd->assoc_data->timeout); 3544 run_again(sdata, ifmgd->assoc_data->timeout);
3524 3545
3525 if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | 3546 if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
3526 IEEE80211_STA_CONNECTION_POLL) && 3547 IEEE80211_STA_CONNECTION_POLL) &&
@@ -3554,7 +3575,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3554 false); 3575 false);
3555 } 3576 }
3556 } else if (time_is_after_jiffies(ifmgd->probe_timeout)) 3577 } else if (time_is_after_jiffies(ifmgd->probe_timeout))
3557 run_again(ifmgd, ifmgd->probe_timeout); 3578 run_again(sdata, ifmgd->probe_timeout);
3558 else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { 3579 else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
3559 mlme_dbg(sdata, 3580 mlme_dbg(sdata,
3560 "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", 3581 "Failed to send nullfunc to AP %pM after %dms, disconnecting\n",
@@ -3583,7 +3604,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3583 } 3604 }
3584 } 3605 }
3585 3606
3586 mutex_unlock(&ifmgd->mtx); 3607 sdata_unlock(sdata);
3587} 3608}
3588 3609
3589static void ieee80211_sta_bcn_mon_timer(unsigned long data) 3610static void ieee80211_sta_bcn_mon_timer(unsigned long data)
@@ -3644,9 +3665,9 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
3644{ 3665{
3645 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 3666 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3646 3667
3647 mutex_lock(&ifmgd->mtx); 3668 sdata_lock(sdata);
3648 if (!ifmgd->associated) { 3669 if (!ifmgd->associated) {
3649 mutex_unlock(&ifmgd->mtx); 3670 sdata_unlock(sdata);
3650 return; 3671 return;
3651 } 3672 }
3652 3673
@@ -3657,10 +3678,10 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
3657 ifmgd->associated->bssid, 3678 ifmgd->associated->bssid,
3658 WLAN_REASON_UNSPECIFIED, 3679 WLAN_REASON_UNSPECIFIED,
3659 true); 3680 true);
3660 mutex_unlock(&ifmgd->mtx); 3681 sdata_unlock(sdata);
3661 return; 3682 return;
3662 } 3683 }
3663 mutex_unlock(&ifmgd->mtx); 3684 sdata_unlock(sdata);
3664} 3685}
3665#endif 3686#endif
3666 3687
@@ -3692,8 +3713,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
3692 ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len; 3713 ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len;
3693 ifmgd->p2p_noa_index = -1; 3714 ifmgd->p2p_noa_index = -1;
3694 3715
3695 mutex_init(&ifmgd->mtx);
3696
3697 if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) 3716 if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS)
3698 ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC; 3717 ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC;
3699 else 3718 else
@@ -3833,7 +3852,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
3833 ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, 3852 ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
3834 cbss->channel, 3853 cbss->channel,
3835 ht_oper, vht_oper, 3854 ht_oper, vht_oper,
3836 &chandef, true); 3855 &chandef, false);
3837 3856
3838 sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), 3857 sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
3839 local->rx_chains); 3858 local->rx_chains);
@@ -3850,6 +3869,12 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
3850 */ 3869 */
3851 ret = ieee80211_vif_use_channel(sdata, &chandef, 3870 ret = ieee80211_vif_use_channel(sdata, &chandef,
3852 IEEE80211_CHANCTX_SHARED); 3871 IEEE80211_CHANCTX_SHARED);
3872
3873 /* don't downgrade for 5 and 10 MHz channels, though. */
3874 if (chandef.width == NL80211_CHAN_WIDTH_5 ||
3875 chandef.width == NL80211_CHAN_WIDTH_10)
3876 return ret;
3877
3853 while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { 3878 while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) {
3854 ifmgd->flags |= chandef_downgrade(&chandef); 3879 ifmgd->flags |= chandef_downgrade(&chandef);
3855 ret = ieee80211_vif_use_channel(sdata, &chandef, 3880 ret = ieee80211_vif_use_channel(sdata, &chandef,
@@ -3882,27 +3907,40 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
3882 if (!new_sta) 3907 if (!new_sta)
3883 return -ENOMEM; 3908 return -ENOMEM;
3884 } 3909 }
3885
3886 if (new_sta) { 3910 if (new_sta) {
3887 u32 rates = 0, basic_rates = 0; 3911 u32 rates = 0, basic_rates = 0;
3888 bool have_higher_than_11mbit; 3912 bool have_higher_than_11mbit;
3889 int min_rate = INT_MAX, min_rate_index = -1; 3913 int min_rate = INT_MAX, min_rate_index = -1;
3914 struct ieee80211_chanctx_conf *chanctx_conf;
3890 struct ieee80211_supported_band *sband; 3915 struct ieee80211_supported_band *sband;
3891 const struct cfg80211_bss_ies *ies; 3916 const struct cfg80211_bss_ies *ies;
3917 int shift;
3918 u32 rate_flags;
3892 3919
3893 sband = local->hw.wiphy->bands[cbss->channel->band]; 3920 sband = local->hw.wiphy->bands[cbss->channel->band];
3894 3921
3895 err = ieee80211_prep_channel(sdata, cbss); 3922 err = ieee80211_prep_channel(sdata, cbss);
3896 if (err) { 3923 if (err) {
3897 sta_info_free(local, new_sta); 3924 sta_info_free(local, new_sta);
3898 return err; 3925 return -EINVAL;
3899 } 3926 }
3927 shift = ieee80211_vif_get_shift(&sdata->vif);
3928
3929 rcu_read_lock();
3930 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
3931 if (WARN_ON(!chanctx_conf)) {
3932 rcu_read_unlock();
3933 return -EINVAL;
3934 }
3935 rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
3936 rcu_read_unlock();
3900 3937
3901 ieee80211_get_rates(sband, bss->supp_rates, 3938 ieee80211_get_rates(sband, bss->supp_rates,
3902 bss->supp_rates_len, 3939 bss->supp_rates_len,
3903 &rates, &basic_rates, 3940 &rates, &basic_rates,
3904 &have_higher_than_11mbit, 3941 &have_higher_than_11mbit,
3905 &min_rate, &min_rate_index); 3942 &min_rate, &min_rate_index,
3943 shift, rate_flags);
3906 3944
3907 /* 3945 /*
3908 * This used to be a workaround for basic rates missing 3946 * This used to be a workaround for basic rates missing
@@ -4049,8 +4087,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4049 4087
4050 /* try to authenticate/probe */ 4088 /* try to authenticate/probe */
4051 4089
4052 mutex_lock(&ifmgd->mtx);
4053
4054 if ((ifmgd->auth_data && !ifmgd->auth_data->done) || 4090 if ((ifmgd->auth_data && !ifmgd->auth_data->done) ||
4055 ifmgd->assoc_data) { 4091 ifmgd->assoc_data) {
4056 err = -EBUSY; 4092 err = -EBUSY;
@@ -4070,8 +4106,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4070 WLAN_REASON_UNSPECIFIED, 4106 WLAN_REASON_UNSPECIFIED,
4071 false, frame_buf); 4107 false, frame_buf);
4072 4108
4073 __cfg80211_send_deauth(sdata->dev, frame_buf, 4109 cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
4074 sizeof(frame_buf)); 4110 sizeof(frame_buf));
4075 } 4111 }
4076 4112
4077 sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); 4113 sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid);
@@ -4088,8 +4124,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4088 4124
4089 /* hold our own reference */ 4125 /* hold our own reference */
4090 cfg80211_ref_bss(local->hw.wiphy, auth_data->bss); 4126 cfg80211_ref_bss(local->hw.wiphy, auth_data->bss);
4091 err = 0; 4127 return 0;
4092 goto out_unlock;
4093 4128
4094 err_clear: 4129 err_clear:
4095 memset(ifmgd->bssid, 0, ETH_ALEN); 4130 memset(ifmgd->bssid, 0, ETH_ALEN);
@@ -4097,9 +4132,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4097 ifmgd->auth_data = NULL; 4132 ifmgd->auth_data = NULL;
4098 err_free: 4133 err_free:
4099 kfree(auth_data); 4134 kfree(auth_data);
4100 out_unlock:
4101 mutex_unlock(&ifmgd->mtx);
4102
4103 return err; 4135 return err;
4104} 4136}
4105 4137
@@ -4130,8 +4162,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4130 assoc_data->ssid_len = ssidie[1]; 4162 assoc_data->ssid_len = ssidie[1];
4131 rcu_read_unlock(); 4163 rcu_read_unlock();
4132 4164
4133 mutex_lock(&ifmgd->mtx);
4134
4135 if (ifmgd->associated) { 4165 if (ifmgd->associated) {
4136 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; 4166 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
4137 4167
@@ -4139,8 +4169,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4139 WLAN_REASON_UNSPECIFIED, 4169 WLAN_REASON_UNSPECIFIED,
4140 false, frame_buf); 4170 false, frame_buf);
4141 4171
4142 __cfg80211_send_deauth(sdata->dev, frame_buf, 4172 cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
4143 sizeof(frame_buf)); 4173 sizeof(frame_buf));
4144 } 4174 }
4145 4175
4146 if (ifmgd->auth_data && !ifmgd->auth_data->done) { 4176 if (ifmgd->auth_data && !ifmgd->auth_data->done) {
@@ -4287,6 +4317,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4287 4317
4288 ifmgd->assoc_data = assoc_data; 4318 ifmgd->assoc_data = assoc_data;
4289 ifmgd->dtim_period = 0; 4319 ifmgd->dtim_period = 0;
4320 ifmgd->have_beacon = false;
4290 4321
4291 err = ieee80211_prep_connection(sdata, req->bss, true); 4322 err = ieee80211_prep_connection(sdata, req->bss, true);
4292 if (err) 4323 if (err)
@@ -4318,7 +4349,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4318 ifmgd->dtim_period = tim->dtim_period; 4349 ifmgd->dtim_period = tim->dtim_period;
4319 dtim_count = tim->dtim_count; 4350 dtim_count = tim->dtim_count;
4320 } 4351 }
4321 assoc_data->have_beacon = true; 4352 ifmgd->have_beacon = true;
4322 assoc_data->timeout = jiffies; 4353 assoc_data->timeout = jiffies;
4323 assoc_data->timeout_started = true; 4354 assoc_data->timeout_started = true;
4324 4355
@@ -4334,7 +4365,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4334 } 4365 }
4335 rcu_read_unlock(); 4366 rcu_read_unlock();
4336 4367
4337 run_again(ifmgd, assoc_data->timeout); 4368 run_again(sdata, assoc_data->timeout);
4338 4369
4339 if (bss->corrupt_data) { 4370 if (bss->corrupt_data) {
4340 char *corrupt_type = "data"; 4371 char *corrupt_type = "data";
@@ -4350,17 +4381,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4350 corrupt_type); 4381 corrupt_type);
4351 } 4382 }
4352 4383
4353 err = 0; 4384 return 0;
4354 goto out;
4355 err_clear: 4385 err_clear:
4356 memset(ifmgd->bssid, 0, ETH_ALEN); 4386 memset(ifmgd->bssid, 0, ETH_ALEN);
4357 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); 4387 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
4358 ifmgd->assoc_data = NULL; 4388 ifmgd->assoc_data = NULL;
4359 err_free: 4389 err_free:
4360 kfree(assoc_data); 4390 kfree(assoc_data);
4361 out:
4362 mutex_unlock(&ifmgd->mtx);
4363
4364 return err; 4391 return err;
4365} 4392}
4366 4393
@@ -4372,8 +4399,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
4372 bool tx = !req->local_state_change; 4399 bool tx = !req->local_state_change;
4373 bool report_frame = false; 4400 bool report_frame = false;
4374 4401
4375 mutex_lock(&ifmgd->mtx);
4376
4377 sdata_info(sdata, 4402 sdata_info(sdata,
4378 "deauthenticating from %pM by local choice (reason=%d)\n", 4403 "deauthenticating from %pM by local choice (reason=%d)\n",
4379 req->bssid, req->reason_code); 4404 req->bssid, req->reason_code);
@@ -4385,7 +4410,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
4385 req->reason_code, tx, 4410 req->reason_code, tx,
4386 frame_buf); 4411 frame_buf);
4387 ieee80211_destroy_auth_data(sdata, false); 4412 ieee80211_destroy_auth_data(sdata, false);
4388 mutex_unlock(&ifmgd->mtx);
4389 4413
4390 report_frame = true; 4414 report_frame = true;
4391 goto out; 4415 goto out;
@@ -4397,12 +4421,11 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
4397 req->reason_code, tx, frame_buf); 4421 req->reason_code, tx, frame_buf);
4398 report_frame = true; 4422 report_frame = true;
4399 } 4423 }
4400 mutex_unlock(&ifmgd->mtx);
4401 4424
4402 out: 4425 out:
4403 if (report_frame) 4426 if (report_frame)
4404 __cfg80211_send_deauth(sdata->dev, frame_buf, 4427 cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
4405 IEEE80211_DEAUTH_FRAME_LEN); 4428 IEEE80211_DEAUTH_FRAME_LEN);
4406 4429
4407 return 0; 4430 return 0;
4408} 4431}
@@ -4414,18 +4437,14 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
4414 u8 bssid[ETH_ALEN]; 4437 u8 bssid[ETH_ALEN];
4415 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; 4438 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
4416 4439
4417 mutex_lock(&ifmgd->mtx);
4418
4419 /* 4440 /*
4420 * cfg80211 should catch this ... but it's racy since 4441 * cfg80211 should catch this ... but it's racy since
4421 * we can receive a disassoc frame, process it, hand it 4442 * we can receive a disassoc frame, process it, hand it
4422 * to cfg80211 while that's in a locked section already 4443 * to cfg80211 while that's in a locked section already
4423 * trying to tell us that the user wants to disconnect. 4444 * trying to tell us that the user wants to disconnect.
4424 */ 4445 */
4425 if (ifmgd->associated != req->bss) { 4446 if (ifmgd->associated != req->bss)
4426 mutex_unlock(&ifmgd->mtx);
4427 return -ENOLINK; 4447 return -ENOLINK;
4428 }
4429 4448
4430 sdata_info(sdata, 4449 sdata_info(sdata,
4431 "disassociating from %pM by local choice (reason=%d)\n", 4450 "disassociating from %pM by local choice (reason=%d)\n",
@@ -4435,10 +4454,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
4435 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC, 4454 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC,
4436 req->reason_code, !req->local_state_change, 4455 req->reason_code, !req->local_state_change,
4437 frame_buf); 4456 frame_buf);
4438 mutex_unlock(&ifmgd->mtx);
4439 4457
4440 __cfg80211_send_disassoc(sdata->dev, frame_buf, 4458 cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
4441 IEEE80211_DEAUTH_FRAME_LEN); 4459 IEEE80211_DEAUTH_FRAME_LEN);
4442 4460
4443 return 0; 4461 return 0;
4444} 4462}
@@ -4458,13 +4476,16 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
4458 cancel_work_sync(&ifmgd->csa_connection_drop_work); 4476 cancel_work_sync(&ifmgd->csa_connection_drop_work);
4459 cancel_work_sync(&ifmgd->chswitch_work); 4477 cancel_work_sync(&ifmgd->chswitch_work);
4460 4478
4461 mutex_lock(&ifmgd->mtx); 4479 sdata_lock(sdata);
4462 if (ifmgd->assoc_data) 4480 if (ifmgd->assoc_data) {
4481 struct cfg80211_bss *bss = ifmgd->assoc_data->bss;
4463 ieee80211_destroy_assoc_data(sdata, false); 4482 ieee80211_destroy_assoc_data(sdata, false);
4483 cfg80211_assoc_timeout(sdata->dev, bss);
4484 }
4464 if (ifmgd->auth_data) 4485 if (ifmgd->auth_data)
4465 ieee80211_destroy_auth_data(sdata, false); 4486 ieee80211_destroy_auth_data(sdata, false);
4466 del_timer_sync(&ifmgd->timer); 4487 del_timer_sync(&ifmgd->timer);
4467 mutex_unlock(&ifmgd->mtx); 4488 sdata_unlock(sdata);
4468} 4489}
4469 4490
4470void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, 4491void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 7fc5d0d8149a..340126204343 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -99,10 +99,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
99 } 99 }
100 mutex_unlock(&local->sta_mtx); 100 mutex_unlock(&local->sta_mtx);
101 101
102 /* remove all interfaces */ 102 /* remove all interfaces that were created in the driver */
103 list_for_each_entry(sdata, &local->interfaces, list) { 103 list_for_each_entry(sdata, &local->interfaces, list) {
104 if (!ieee80211_sdata_running(sdata)) 104 if (!ieee80211_sdata_running(sdata) ||
105 sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
106 sdata->vif.type == NL80211_IFTYPE_MONITOR)
105 continue; 107 continue;
108
106 drv_remove_interface(local, sdata); 109 drv_remove_interface(local, sdata);
107 } 110 }
108 111
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index d3f414fe67e0..e126605cec66 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -210,7 +210,7 @@ static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc)
210 !ieee80211_is_data(fc); 210 !ieee80211_is_data(fc);
211} 211}
212 212
213static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, 213static void rc_send_low_basicrate(s8 *idx, u32 basic_rates,
214 struct ieee80211_supported_band *sband) 214 struct ieee80211_supported_band *sband)
215{ 215{
216 u8 i; 216 u8 i;
@@ -232,37 +232,28 @@ static void rc_send_low_broadcast(s8 *idx, u32 basic_rates,
232 /* could not find a basic rate; use original selection */ 232 /* could not find a basic rate; use original selection */
233} 233}
234 234
235static inline s8 235static void __rate_control_send_low(struct ieee80211_hw *hw,
236rate_lowest_non_cck_index(struct ieee80211_supported_band *sband, 236 struct ieee80211_supported_band *sband,
237 struct ieee80211_sta *sta) 237 struct ieee80211_sta *sta,
238 struct ieee80211_tx_info *info)
238{ 239{
239 int i; 240 int i;
241 u32 rate_flags =
242 ieee80211_chandef_rate_flags(&hw->conf.chandef);
243
244 if ((sband->band == IEEE80211_BAND_2GHZ) &&
245 (info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
246 rate_flags |= IEEE80211_RATE_ERP_G;
240 247
248 info->control.rates[0].idx = 0;
241 for (i = 0; i < sband->n_bitrates; i++) { 249 for (i = 0; i < sband->n_bitrates; i++) {
242 struct ieee80211_rate *srate = &sband->bitrates[i]; 250 if (!rate_supported(sta, sband->band, i))
243 if ((srate->bitrate == 10) || (srate->bitrate == 20) ||
244 (srate->bitrate == 55) || (srate->bitrate == 110))
245 continue; 251 continue;
246 252
247 if (rate_supported(sta, sband->band, i)) 253 info->control.rates[0].idx = i;
248 return i; 254 break;
249 } 255 }
250 256 WARN_ON_ONCE(i == sband->n_bitrates);
251 /* No matching rate found */
252 return 0;
253}
254
255static void __rate_control_send_low(struct ieee80211_hw *hw,
256 struct ieee80211_supported_band *sband,
257 struct ieee80211_sta *sta,
258 struct ieee80211_tx_info *info)
259{
260 if ((sband->band != IEEE80211_BAND_2GHZ) ||
261 !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
262 info->control.rates[0].idx = rate_lowest_index(sband, sta);
263 else
264 info->control.rates[0].idx =
265 rate_lowest_non_cck_index(sband, sta);
266 257
267 info->control.rates[0].count = 258 info->control.rates[0].count =
268 (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 259 (info->flags & IEEE80211_TX_CTL_NO_ACK) ?
@@ -272,28 +263,37 @@ static void __rate_control_send_low(struct ieee80211_hw *hw,
272} 263}
273 264
274 265
275bool rate_control_send_low(struct ieee80211_sta *sta, 266bool rate_control_send_low(struct ieee80211_sta *pubsta,
276 void *priv_sta, 267 void *priv_sta,
277 struct ieee80211_tx_rate_control *txrc) 268 struct ieee80211_tx_rate_control *txrc)
278{ 269{
279 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); 270 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
280 struct ieee80211_supported_band *sband = txrc->sband; 271 struct ieee80211_supported_band *sband = txrc->sband;
272 struct sta_info *sta;
281 int mcast_rate; 273 int mcast_rate;
274 bool use_basicrate = false;
282 275
283 if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) { 276 if (!pubsta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) {
284 __rate_control_send_low(txrc->hw, sband, sta, info); 277 __rate_control_send_low(txrc->hw, sband, pubsta, info);
285 278
286 if (!sta && txrc->bss) { 279 if (!pubsta && txrc->bss) {
287 mcast_rate = txrc->bss_conf->mcast_rate[sband->band]; 280 mcast_rate = txrc->bss_conf->mcast_rate[sband->band];
288 if (mcast_rate > 0) { 281 if (mcast_rate > 0) {
289 info->control.rates[0].idx = mcast_rate - 1; 282 info->control.rates[0].idx = mcast_rate - 1;
290 return true; 283 return true;
291 } 284 }
285 use_basicrate = true;
286 } else if (pubsta) {
287 sta = container_of(pubsta, struct sta_info, sta);
288 if (ieee80211_vif_is_mesh(&sta->sdata->vif))
289 use_basicrate = true;
290 }
292 291
293 rc_send_low_broadcast(&info->control.rates[0].idx, 292 if (use_basicrate)
293 rc_send_low_basicrate(&info->control.rates[0].idx,
294 txrc->bss_conf->basic_rates, 294 txrc->bss_conf->basic_rates,
295 sband); 295 sband);
296 } 296
297 return true; 297 return true;
298 } 298 }
299 return false; 299 return false;
@@ -397,8 +397,14 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
397 return; 397 return;
398 398
399 /* if HT BSS, and we handle a data frame, also try HT rates */ 399 /* if HT BSS, and we handle a data frame, also try HT rates */
400 if (chan_width == NL80211_CHAN_WIDTH_20_NOHT) 400 switch (chan_width) {
401 case NL80211_CHAN_WIDTH_20_NOHT:
402 case NL80211_CHAN_WIDTH_5:
403 case NL80211_CHAN_WIDTH_10:
401 return; 404 return;
405 default:
406 break;
407 }
402 408
403 alt_rate.idx = 0; 409 alt_rate.idx = 0;
404 /* keep protection flags */ 410 /* keep protection flags */
@@ -579,6 +585,7 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
579 u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; 585 u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
580 bool has_mcs_mask; 586 bool has_mcs_mask;
581 u32 mask; 587 u32 mask;
588 u32 rate_flags;
582 int i; 589 int i;
583 590
584 /* 591 /*
@@ -588,6 +595,12 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
588 */ 595 */
589 mask = sdata->rc_rateidx_mask[info->band]; 596 mask = sdata->rc_rateidx_mask[info->band];
590 has_mcs_mask = sdata->rc_has_mcs_mask[info->band]; 597 has_mcs_mask = sdata->rc_has_mcs_mask[info->band];
598 rate_flags =
599 ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
600 for (i = 0; i < sband->n_bitrates; i++)
601 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
602 mask &= ~BIT(i);
603
591 if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask) 604 if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask)
592 return; 605 return;
593 606
@@ -615,7 +628,7 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
615 if (rates[i].idx < 0) 628 if (rates[i].idx < 0)
616 break; 629 break;
617 630
618 rate_idx_match_mask(&rates[i], sband, mask, chan_width, 631 rate_idx_match_mask(&rates[i], sband, chan_width, mask,
619 mcs_mask); 632 mcs_mask);
620 } 633 }
621} 634}
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index d35a5dd3fb13..5dedc56c94db 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -66,11 +66,12 @@ static inline void rate_control_rate_init(struct sta_info *sta)
66 } 66 }
67 67
68 sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; 68 sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band];
69 rcu_read_unlock();
70 69
71 ieee80211_sta_set_rx_nss(sta); 70 ieee80211_sta_set_rx_nss(sta);
72 71
73 ref->ops->rate_init(ref->priv, sband, ista, priv_sta); 72 ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista,
73 priv_sta);
74 rcu_read_unlock();
74 set_sta_flag(sta, WLAN_STA_RATE_CONTROL); 75 set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
75} 76}
76 77
@@ -81,10 +82,21 @@ static inline void rate_control_rate_update(struct ieee80211_local *local,
81 struct rate_control_ref *ref = local->rate_ctrl; 82 struct rate_control_ref *ref = local->rate_ctrl;
82 struct ieee80211_sta *ista = &sta->sta; 83 struct ieee80211_sta *ista = &sta->sta;
83 void *priv_sta = sta->rate_ctrl_priv; 84 void *priv_sta = sta->rate_ctrl_priv;
85 struct ieee80211_chanctx_conf *chanctx_conf;
86
87 if (ref && ref->ops->rate_update) {
88 rcu_read_lock();
84 89
85 if (ref && ref->ops->rate_update) 90 chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf);
86 ref->ops->rate_update(ref->priv, sband, ista, 91 if (WARN_ON(!chanctx_conf)) {
87 priv_sta, changed); 92 rcu_read_unlock();
93 return;
94 }
95
96 ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def,
97 ista, priv_sta, changed);
98 rcu_read_unlock();
99 }
88 drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); 100 drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
89} 101}
90 102
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index ac7ef5414bde..8b5f7ef7c0c9 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -290,7 +290,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
290 struct minstrel_rate *msr, *mr; 290 struct minstrel_rate *msr, *mr;
291 unsigned int ndx; 291 unsigned int ndx;
292 bool mrr_capable; 292 bool mrr_capable;
293 bool prev_sample = mi->prev_sample; 293 bool prev_sample;
294 int delta; 294 int delta;
295 int sampling_ratio; 295 int sampling_ratio;
296 296
@@ -314,6 +314,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
314 (mi->sample_count + mi->sample_deferred / 2); 314 (mi->sample_count + mi->sample_deferred / 2);
315 315
316 /* delta < 0: no sampling required */ 316 /* delta < 0: no sampling required */
317 prev_sample = mi->prev_sample;
317 mi->prev_sample = false; 318 mi->prev_sample = false;
318 if (delta < 0 || (!mrr_capable && prev_sample)) 319 if (delta < 0 || (!mrr_capable && prev_sample))
319 return; 320 return;
@@ -382,14 +383,18 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
382static void 383static void
383calc_rate_durations(enum ieee80211_band band, 384calc_rate_durations(enum ieee80211_band band,
384 struct minstrel_rate *d, 385 struct minstrel_rate *d,
385 struct ieee80211_rate *rate) 386 struct ieee80211_rate *rate,
387 struct cfg80211_chan_def *chandef)
386{ 388{
387 int erp = !!(rate->flags & IEEE80211_RATE_ERP_G); 389 int erp = !!(rate->flags & IEEE80211_RATE_ERP_G);
390 int shift = ieee80211_chandef_get_shift(chandef);
388 391
389 d->perfect_tx_time = ieee80211_frame_duration(band, 1200, 392 d->perfect_tx_time = ieee80211_frame_duration(band, 1200,
390 rate->bitrate, erp, 1); 393 DIV_ROUND_UP(rate->bitrate, 1 << shift), erp, 1,
394 shift);
391 d->ack_time = ieee80211_frame_duration(band, 10, 395 d->ack_time = ieee80211_frame_duration(band, 10,
392 rate->bitrate, erp, 1); 396 DIV_ROUND_UP(rate->bitrate, 1 << shift), erp, 1,
397 shift);
393} 398}
394 399
395static void 400static void
@@ -417,21 +422,25 @@ init_sample_table(struct minstrel_sta_info *mi)
417 422
418static void 423static void
419minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, 424minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
420 struct ieee80211_sta *sta, void *priv_sta) 425 struct cfg80211_chan_def *chandef,
426 struct ieee80211_sta *sta, void *priv_sta)
421{ 427{
422 struct minstrel_sta_info *mi = priv_sta; 428 struct minstrel_sta_info *mi = priv_sta;
423 struct minstrel_priv *mp = priv; 429 struct minstrel_priv *mp = priv;
424 struct ieee80211_rate *ctl_rate; 430 struct ieee80211_rate *ctl_rate;
425 unsigned int i, n = 0; 431 unsigned int i, n = 0;
426 unsigned int t_slot = 9; /* FIXME: get real slot time */ 432 unsigned int t_slot = 9; /* FIXME: get real slot time */
433 u32 rate_flags;
427 434
428 mi->sta = sta; 435 mi->sta = sta;
429 mi->lowest_rix = rate_lowest_index(sband, sta); 436 mi->lowest_rix = rate_lowest_index(sband, sta);
430 ctl_rate = &sband->bitrates[mi->lowest_rix]; 437 ctl_rate = &sband->bitrates[mi->lowest_rix];
431 mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10, 438 mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10,
432 ctl_rate->bitrate, 439 ctl_rate->bitrate,
433 !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1); 440 !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1,
441 ieee80211_chandef_get_shift(chandef));
434 442
443 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
435 memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate)); 444 memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate));
436 mi->max_prob_rate = 0; 445 mi->max_prob_rate = 0;
437 446
@@ -440,15 +449,22 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
440 unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0; 449 unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0;
441 unsigned int tx_time_single; 450 unsigned int tx_time_single;
442 unsigned int cw = mp->cw_min; 451 unsigned int cw = mp->cw_min;
452 int shift;
443 453
444 if (!rate_supported(sta, sband->band, i)) 454 if (!rate_supported(sta, sband->band, i))
445 continue; 455 continue;
456 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
457 continue;
458
446 n++; 459 n++;
447 memset(mr, 0, sizeof(*mr)); 460 memset(mr, 0, sizeof(*mr));
448 461
449 mr->rix = i; 462 mr->rix = i;
450 mr->bitrate = sband->bitrates[i].bitrate / 5; 463 shift = ieee80211_chandef_get_shift(chandef);
451 calc_rate_durations(sband->band, mr, &sband->bitrates[i]); 464 mr->bitrate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
465 (1 << shift) * 5);
466 calc_rate_durations(sband->band, mr, &sband->bitrates[i],
467 chandef);
452 468
453 /* calculate maximum number of retransmissions before 469 /* calculate maximum number of retransmissions before
454 * fallback (based on maximum segment size) */ 470 * fallback (based on maximum segment size) */
@@ -546,6 +562,7 @@ minstrel_init_cck_rates(struct minstrel_priv *mp)
546{ 562{
547 static const int bitrates[4] = { 10, 20, 55, 110 }; 563 static const int bitrates[4] = { 10, 20, 55, 110 };
548 struct ieee80211_supported_band *sband; 564 struct ieee80211_supported_band *sband;
565 u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
549 int i, j; 566 int i, j;
550 567
551 sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ]; 568 sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ];
@@ -558,6 +575,9 @@ minstrel_init_cck_rates(struct minstrel_priv *mp)
558 if (rate->flags & IEEE80211_RATE_ERP_G) 575 if (rate->flags & IEEE80211_RATE_ERP_G)
559 continue; 576 continue;
560 577
578 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
579 continue;
580
561 for (j = 0; j < ARRAY_SIZE(bitrates); j++) { 581 for (j = 0; j < ARRAY_SIZE(bitrates); j++) {
562 if (rate->bitrate != bitrates[j]) 582 if (rate->bitrate != bitrates[j])
563 continue; 583 continue;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 5b2d3012b983..7c323f27ba23 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -776,7 +776,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
776 776
777 /* Don't use EAPOL frames for sampling on non-mrr hw */ 777 /* Don't use EAPOL frames for sampling on non-mrr hw */
778 if (mp->hw->max_rates == 1 && 778 if (mp->hw->max_rates == 1 &&
779 txrc->skb->protocol == cpu_to_be16(ETH_P_PAE)) 779 (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
780 sample_idx = -1; 780 sample_idx = -1;
781 else 781 else
782 sample_idx = minstrel_get_sample_rate(mp, mi); 782 sample_idx = minstrel_get_sample_rate(mp, mi);
@@ -804,10 +804,18 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
804 804
805 sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; 805 sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES];
806 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; 806 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
807 rate->count = 1;
808
809 if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) {
810 int idx = sample_idx % ARRAY_SIZE(mp->cck_rates);
811 rate->idx = mp->cck_rates[idx];
812 rate->flags = 0;
813 return;
814 }
815
807 rate->idx = sample_idx % MCS_GROUP_RATES + 816 rate->idx = sample_idx % MCS_GROUP_RATES +
808 (sample_group->streams - 1) * MCS_GROUP_RATES; 817 (sample_group->streams - 1) * MCS_GROUP_RATES;
809 rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags; 818 rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags;
810 rate->count = 1;
811} 819}
812 820
813static void 821static void
@@ -820,6 +828,9 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
820 if (sband->band != IEEE80211_BAND_2GHZ) 828 if (sband->band != IEEE80211_BAND_2GHZ)
821 return; 829 return;
822 830
831 if (!(mp->hw->flags & IEEE80211_HW_SUPPORTS_HT_CCK_RATES))
832 return;
833
823 mi->cck_supported = 0; 834 mi->cck_supported = 0;
824 mi->cck_supported_short = 0; 835 mi->cck_supported_short = 0;
825 for (i = 0; i < 4; i++) { 836 for (i = 0; i < 4; i++) {
@@ -836,6 +847,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
836 847
837static void 848static void
838minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, 849minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
850 struct cfg80211_chan_def *chandef,
839 struct ieee80211_sta *sta, void *priv_sta) 851 struct ieee80211_sta *sta, void *priv_sta)
840{ 852{
841 struct minstrel_priv *mp = priv; 853 struct minstrel_priv *mp = priv;
@@ -861,8 +873,9 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
861 mi->sta = sta; 873 mi->sta = sta;
862 mi->stats_update = jiffies; 874 mi->stats_update = jiffies;
863 875
864 ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1); 876 ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0);
865 mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1) + ack_dur; 877 mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1, 0);
878 mi->overhead += ack_dur;
866 mi->overhead_rtscts = mi->overhead + 2 * ack_dur; 879 mi->overhead_rtscts = mi->overhead + 2 * ack_dur;
867 880
868 mi->avg_ampdu_len = MINSTREL_FRAC(1, 1); 881 mi->avg_ampdu_len = MINSTREL_FRAC(1, 1);
@@ -931,22 +944,25 @@ use_legacy:
931 memset(&msp->legacy, 0, sizeof(msp->legacy)); 944 memset(&msp->legacy, 0, sizeof(msp->legacy));
932 msp->legacy.r = msp->ratelist; 945 msp->legacy.r = msp->ratelist;
933 msp->legacy.sample_table = msp->sample_table; 946 msp->legacy.sample_table = msp->sample_table;
934 return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy); 947 return mac80211_minstrel.rate_init(priv, sband, chandef, sta,
948 &msp->legacy);
935} 949}
936 950
937static void 951static void
938minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband, 952minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband,
953 struct cfg80211_chan_def *chandef,
939 struct ieee80211_sta *sta, void *priv_sta) 954 struct ieee80211_sta *sta, void *priv_sta)
940{ 955{
941 minstrel_ht_update_caps(priv, sband, sta, priv_sta); 956 minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta);
942} 957}
943 958
944static void 959static void
945minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband, 960minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband,
961 struct cfg80211_chan_def *chandef,
946 struct ieee80211_sta *sta, void *priv_sta, 962 struct ieee80211_sta *sta, void *priv_sta,
947 u32 changed) 963 u32 changed)
948{ 964{
949 minstrel_ht_update_caps(priv, sband, sta, priv_sta); 965 minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta);
950} 966}
951 967
952static void * 968static void *
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 502d3ecc4a79..958fad07b54c 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -293,6 +293,7 @@ rate_control_pid_get_rate(void *priv, struct ieee80211_sta *sta,
293 293
294static void 294static void
295rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, 295rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband,
296 struct cfg80211_chan_def *chandef,
296 struct ieee80211_sta *sta, void *priv_sta) 297 struct ieee80211_sta *sta, void *priv_sta)
297{ 298{
298 struct rc_pid_sta_info *spinfo = priv_sta; 299 struct rc_pid_sta_info *spinfo = priv_sta;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8e2952620256..54395d7583ba 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -87,11 +87,13 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
87 int len; 87 int len;
88 88
89 /* always present fields */ 89 /* always present fields */
90 len = sizeof(struct ieee80211_radiotap_header) + 9; 90 len = sizeof(struct ieee80211_radiotap_header) + 8;
91 91
92 /* allocate extra bitmap */ 92 /* allocate extra bitmaps */
93 if (status->vendor_radiotap_len) 93 if (status->vendor_radiotap_len)
94 len += 4; 94 len += 4;
95 if (status->chains)
96 len += 4 * hweight8(status->chains);
95 97
96 if (ieee80211_have_rx_timestamp(status)) { 98 if (ieee80211_have_rx_timestamp(status)) {
97 len = ALIGN(len, 8); 99 len = ALIGN(len, 8);
@@ -100,6 +102,10 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
100 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 102 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
101 len += 1; 103 len += 1;
102 104
105 /* antenna field, if we don't have per-chain info */
106 if (!status->chains)
107 len += 1;
108
103 /* padding for RX_FLAGS if necessary */ 109 /* padding for RX_FLAGS if necessary */
104 len = ALIGN(len, 2); 110 len = ALIGN(len, 2);
105 111
@@ -116,6 +122,11 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
116 len += 12; 122 len += 12;
117 } 123 }
118 124
125 if (status->chains) {
126 /* antenna and antenna signal fields */
127 len += 2 * hweight8(status->chains);
128 }
129
119 if (status->vendor_radiotap_len) { 130 if (status->vendor_radiotap_len) {
120 if (WARN_ON_ONCE(status->vendor_radiotap_align == 0)) 131 if (WARN_ON_ONCE(status->vendor_radiotap_align == 0))
121 status->vendor_radiotap_align = 1; 132 status->vendor_radiotap_align = 1;
@@ -145,8 +156,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
145 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 156 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
146 struct ieee80211_radiotap_header *rthdr; 157 struct ieee80211_radiotap_header *rthdr;
147 unsigned char *pos; 158 unsigned char *pos;
159 __le32 *it_present;
160 u32 it_present_val;
148 u16 rx_flags = 0; 161 u16 rx_flags = 0;
149 int mpdulen; 162 u16 channel_flags = 0;
163 int mpdulen, chain;
164 unsigned long chains = status->chains;
150 165
151 mpdulen = skb->len; 166 mpdulen = skb->len;
152 if (!(has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS))) 167 if (!(has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)))
@@ -154,25 +169,39 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
154 169
155 rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len); 170 rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len);
156 memset(rthdr, 0, rtap_len); 171 memset(rthdr, 0, rtap_len);
172 it_present = &rthdr->it_present;
157 173
158 /* radiotap header, set always present flags */ 174 /* radiotap header, set always present flags */
159 rthdr->it_present =
160 cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) |
161 (1 << IEEE80211_RADIOTAP_CHANNEL) |
162 (1 << IEEE80211_RADIOTAP_ANTENNA) |
163 (1 << IEEE80211_RADIOTAP_RX_FLAGS));
164 rthdr->it_len = cpu_to_le16(rtap_len + status->vendor_radiotap_len); 175 rthdr->it_len = cpu_to_le16(rtap_len + status->vendor_radiotap_len);
176 it_present_val = BIT(IEEE80211_RADIOTAP_FLAGS) |
177 BIT(IEEE80211_RADIOTAP_CHANNEL) |
178 BIT(IEEE80211_RADIOTAP_RX_FLAGS);
165 179
166 pos = (unsigned char *)(rthdr + 1); 180 if (!status->chains)
181 it_present_val |= BIT(IEEE80211_RADIOTAP_ANTENNA);
182
183 for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
184 it_present_val |=
185 BIT(IEEE80211_RADIOTAP_EXT) |
186 BIT(IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE);
187 put_unaligned_le32(it_present_val, it_present);
188 it_present++;
189 it_present_val = BIT(IEEE80211_RADIOTAP_ANTENNA) |
190 BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL);
191 }
167 192
168 if (status->vendor_radiotap_len) { 193 if (status->vendor_radiotap_len) {
169 rthdr->it_present |= 194 it_present_val |= BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE) |
170 cpu_to_le32(BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE)) | 195 BIT(IEEE80211_RADIOTAP_EXT);
171 cpu_to_le32(BIT(IEEE80211_RADIOTAP_EXT)); 196 put_unaligned_le32(it_present_val, it_present);
172 put_unaligned_le32(status->vendor_radiotap_bitmap, pos); 197 it_present++;
173 pos += 4; 198 it_present_val = status->vendor_radiotap_bitmap;
174 } 199 }
175 200
201 put_unaligned_le32(it_present_val, it_present);
202
203 pos = (void *)(it_present + 1);
204
176 /* the order of the following fields is important */ 205 /* the order of the following fields is important */
177 206
178 /* IEEE80211_RADIOTAP_TSFT */ 207 /* IEEE80211_RADIOTAP_TSFT */
@@ -207,28 +236,35 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
207 */ 236 */
208 *pos = 0; 237 *pos = 0;
209 } else { 238 } else {
239 int shift = 0;
210 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); 240 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
211 *pos = rate->bitrate / 5; 241 if (status->flag & RX_FLAG_10MHZ)
242 shift = 1;
243 else if (status->flag & RX_FLAG_5MHZ)
244 shift = 2;
245 *pos = DIV_ROUND_UP(rate->bitrate, 5 * (1 << shift));
212 } 246 }
213 pos++; 247 pos++;
214 248
215 /* IEEE80211_RADIOTAP_CHANNEL */ 249 /* IEEE80211_RADIOTAP_CHANNEL */
216 put_unaligned_le16(status->freq, pos); 250 put_unaligned_le16(status->freq, pos);
217 pos += 2; 251 pos += 2;
252 if (status->flag & RX_FLAG_10MHZ)
253 channel_flags |= IEEE80211_CHAN_HALF;
254 else if (status->flag & RX_FLAG_5MHZ)
255 channel_flags |= IEEE80211_CHAN_QUARTER;
256
218 if (status->band == IEEE80211_BAND_5GHZ) 257 if (status->band == IEEE80211_BAND_5GHZ)
219 put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ, 258 channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ;
220 pos);
221 else if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) 259 else if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT))
222 put_unaligned_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ, 260 channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
223 pos);
224 else if (rate && rate->flags & IEEE80211_RATE_ERP_G) 261 else if (rate && rate->flags & IEEE80211_RATE_ERP_G)
225 put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ, 262 channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
226 pos);
227 else if (rate) 263 else if (rate)
228 put_unaligned_le16(IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ, 264 channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
229 pos);
230 else 265 else
231 put_unaligned_le16(IEEE80211_CHAN_2GHZ, pos); 266 channel_flags |= IEEE80211_CHAN_2GHZ;
267 put_unaligned_le16(channel_flags, pos);
232 pos += 2; 268 pos += 2;
233 269
234 /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ 270 /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */
@@ -242,9 +278,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
242 278
243 /* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */ 279 /* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */
244 280
245 /* IEEE80211_RADIOTAP_ANTENNA */ 281 if (!status->chains) {
246 *pos = status->antenna; 282 /* IEEE80211_RADIOTAP_ANTENNA */
247 pos++; 283 *pos = status->antenna;
284 pos++;
285 }
248 286
249 /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */ 287 /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */
250 288
@@ -258,6 +296,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
258 pos += 2; 296 pos += 2;
259 297
260 if (status->flag & RX_FLAG_HT) { 298 if (status->flag & RX_FLAG_HT) {
299 unsigned int stbc;
300
261 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); 301 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
262 *pos++ = local->hw.radiotap_mcs_details; 302 *pos++ = local->hw.radiotap_mcs_details;
263 *pos = 0; 303 *pos = 0;
@@ -267,6 +307,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
267 *pos |= IEEE80211_RADIOTAP_MCS_BW_40; 307 *pos |= IEEE80211_RADIOTAP_MCS_BW_40;
268 if (status->flag & RX_FLAG_HT_GF) 308 if (status->flag & RX_FLAG_HT_GF)
269 *pos |= IEEE80211_RADIOTAP_MCS_FMT_GF; 309 *pos |= IEEE80211_RADIOTAP_MCS_FMT_GF;
310 stbc = (status->flag & RX_FLAG_STBC_MASK) >> RX_FLAG_STBC_SHIFT;
311 *pos |= stbc << IEEE80211_RADIOTAP_MCS_STBC_SHIFT;
270 pos++; 312 pos++;
271 *pos++ = status->rate_idx; 313 *pos++ = status->rate_idx;
272 } 314 }
@@ -337,6 +379,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
337 pos += 2; 379 pos += 2;
338 } 380 }
339 381
382 for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
383 *pos++ = status->chain_signal[chain];
384 *pos++ = chain;
385 }
386
340 if (status->vendor_radiotap_len) { 387 if (status->vendor_radiotap_len) {
341 /* ensure 2 byte alignment for the vendor field as required */ 388 /* ensure 2 byte alignment for the vendor field as required */
342 if ((pos - (u8 *)rthdr) & 1) 389 if ((pos - (u8 *)rthdr) & 1)
@@ -932,8 +979,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
932 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 979 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
933 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); 980 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
934 981
935 /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ 982 /*
936 if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { 983 * Drop duplicate 802.11 retransmissions
984 * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery")
985 */
986 if (rx->skb->len >= 24 && rx->sta &&
987 !ieee80211_is_ctl(hdr->frame_control) &&
988 !ieee80211_is_qos_nullfunc(hdr->frame_control) &&
989 !is_multicast_ether_addr(hdr->addr1)) {
937 if (unlikely(ieee80211_has_retry(hdr->frame_control) && 990 if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
938 rx->sta->last_seq_ctrl[rx->seqno_idx] == 991 rx->sta->last_seq_ctrl[rx->seqno_idx] ==
939 hdr->seq_ctrl)) { 992 hdr->seq_ctrl)) {
@@ -1002,207 +1055,6 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
1002 1055
1003 1056
1004static ieee80211_rx_result debug_noinline 1057static ieee80211_rx_result debug_noinline
1005ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
1006{
1007 struct sk_buff *skb = rx->skb;
1008 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
1009 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
1010 int keyidx;
1011 int hdrlen;
1012 ieee80211_rx_result result = RX_DROP_UNUSABLE;
1013 struct ieee80211_key *sta_ptk = NULL;
1014 int mmie_keyidx = -1;
1015 __le16 fc;
1016
1017 /*
1018 * Key selection 101
1019 *
1020 * There are four types of keys:
1021 * - GTK (group keys)
1022 * - IGTK (group keys for management frames)
1023 * - PTK (pairwise keys)
1024 * - STK (station-to-station pairwise keys)
1025 *
1026 * When selecting a key, we have to distinguish between multicast
1027 * (including broadcast) and unicast frames, the latter can only
1028 * use PTKs and STKs while the former always use GTKs and IGTKs.
1029 * Unless, of course, actual WEP keys ("pre-RSNA") are used, then
1030 * unicast frames can also use key indices like GTKs. Hence, if we
1031 * don't have a PTK/STK we check the key index for a WEP key.
1032 *
1033 * Note that in a regular BSS, multicast frames are sent by the
1034 * AP only, associated stations unicast the frame to the AP first
1035 * which then multicasts it on their behalf.
1036 *
1037 * There is also a slight problem in IBSS mode: GTKs are negotiated
1038 * with each station, that is something we don't currently handle.
1039 * The spec seems to expect that one negotiates the same key with
1040 * every station but there's no such requirement; VLANs could be
1041 * possible.
1042 */
1043
1044 /*
1045 * No point in finding a key and decrypting if the frame is neither
1046 * addressed to us nor a multicast frame.
1047 */
1048 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1049 return RX_CONTINUE;
1050
1051 /* start without a key */
1052 rx->key = NULL;
1053
1054 if (rx->sta)
1055 sta_ptk = rcu_dereference(rx->sta->ptk);
1056
1057 fc = hdr->frame_control;
1058
1059 if (!ieee80211_has_protected(fc))
1060 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
1061
1062 if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) {
1063 rx->key = sta_ptk;
1064 if ((status->flag & RX_FLAG_DECRYPTED) &&
1065 (status->flag & RX_FLAG_IV_STRIPPED))
1066 return RX_CONTINUE;
1067 /* Skip decryption if the frame is not protected. */
1068 if (!ieee80211_has_protected(fc))
1069 return RX_CONTINUE;
1070 } else if (mmie_keyidx >= 0) {
1071 /* Broadcast/multicast robust management frame / BIP */
1072 if ((status->flag & RX_FLAG_DECRYPTED) &&
1073 (status->flag & RX_FLAG_IV_STRIPPED))
1074 return RX_CONTINUE;
1075
1076 if (mmie_keyidx < NUM_DEFAULT_KEYS ||
1077 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
1078 return RX_DROP_MONITOR; /* unexpected BIP keyidx */
1079 if (rx->sta)
1080 rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
1081 if (!rx->key)
1082 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
1083 } else if (!ieee80211_has_protected(fc)) {
1084 /*
1085 * The frame was not protected, so skip decryption. However, we
1086 * need to set rx->key if there is a key that could have been
1087 * used so that the frame may be dropped if encryption would
1088 * have been expected.
1089 */
1090 struct ieee80211_key *key = NULL;
1091 struct ieee80211_sub_if_data *sdata = rx->sdata;
1092 int i;
1093
1094 if (ieee80211_is_mgmt(fc) &&
1095 is_multicast_ether_addr(hdr->addr1) &&
1096 (key = rcu_dereference(rx->sdata->default_mgmt_key)))
1097 rx->key = key;
1098 else {
1099 if (rx->sta) {
1100 for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
1101 key = rcu_dereference(rx->sta->gtk[i]);
1102 if (key)
1103 break;
1104 }
1105 }
1106 if (!key) {
1107 for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
1108 key = rcu_dereference(sdata->keys[i]);
1109 if (key)
1110 break;
1111 }
1112 }
1113 if (key)
1114 rx->key = key;
1115 }
1116 return RX_CONTINUE;
1117 } else {
1118 u8 keyid;
1119 /*
1120 * The device doesn't give us the IV so we won't be
1121 * able to look up the key. That's ok though, we
1122 * don't need to decrypt the frame, we just won't
1123 * be able to keep statistics accurate.
1124 * Except for key threshold notifications, should
1125 * we somehow allow the driver to tell us which key
1126 * the hardware used if this flag is set?
1127 */
1128 if ((status->flag & RX_FLAG_DECRYPTED) &&
1129 (status->flag & RX_FLAG_IV_STRIPPED))
1130 return RX_CONTINUE;
1131
1132 hdrlen = ieee80211_hdrlen(fc);
1133
1134 if (rx->skb->len < 8 + hdrlen)
1135 return RX_DROP_UNUSABLE; /* TODO: count this? */
1136
1137 /*
1138 * no need to call ieee80211_wep_get_keyidx,
1139 * it verifies a bunch of things we've done already
1140 */
1141 skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
1142 keyidx = keyid >> 6;
1143
1144 /* check per-station GTK first, if multicast packet */
1145 if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
1146 rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
1147
1148 /* if not found, try default key */
1149 if (!rx->key) {
1150 rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
1151
1152 /*
1153 * RSNA-protected unicast frames should always be
1154 * sent with pairwise or station-to-station keys,
1155 * but for WEP we allow using a key index as well.
1156 */
1157 if (rx->key &&
1158 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
1159 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
1160 !is_multicast_ether_addr(hdr->addr1))
1161 rx->key = NULL;
1162 }
1163 }
1164
1165 if (rx->key) {
1166 if (unlikely(rx->key->flags & KEY_FLAG_TAINTED))
1167 return RX_DROP_MONITOR;
1168
1169 rx->key->tx_rx_count++;
1170 /* TODO: add threshold stuff again */
1171 } else {
1172 return RX_DROP_MONITOR;
1173 }
1174
1175 switch (rx->key->conf.cipher) {
1176 case WLAN_CIPHER_SUITE_WEP40:
1177 case WLAN_CIPHER_SUITE_WEP104:
1178 result = ieee80211_crypto_wep_decrypt(rx);
1179 break;
1180 case WLAN_CIPHER_SUITE_TKIP:
1181 result = ieee80211_crypto_tkip_decrypt(rx);
1182 break;
1183 case WLAN_CIPHER_SUITE_CCMP:
1184 result = ieee80211_crypto_ccmp_decrypt(rx);
1185 break;
1186 case WLAN_CIPHER_SUITE_AES_CMAC:
1187 result = ieee80211_crypto_aes_cmac_decrypt(rx);
1188 break;
1189 default:
1190 /*
1191 * We can reach here only with HW-only algorithms
1192 * but why didn't it decrypt the frame?!
1193 */
1194 return RX_DROP_UNUSABLE;
1195 }
1196
1197 /* the hdr variable is invalid after the decrypt handlers */
1198
1199 /* either the frame has been decrypted or will be dropped */
1200 status->flag |= RX_FLAG_DECRYPTED;
1201
1202 return result;
1203}
1204
1205static ieee80211_rx_result debug_noinline
1206ieee80211_rx_h_check_more_data(struct ieee80211_rx_data *rx) 1058ieee80211_rx_h_check_more_data(struct ieee80211_rx_data *rx)
1207{ 1059{
1208 struct ieee80211_local *local; 1060 struct ieee80211_local *local;
@@ -1372,6 +1224,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1372 struct sk_buff *skb = rx->skb; 1224 struct sk_buff *skb = rx->skb;
1373 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 1225 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
1374 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 1226 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
1227 int i;
1375 1228
1376 if (!sta) 1229 if (!sta)
1377 return RX_CONTINUE; 1230 return RX_CONTINUE;
@@ -1422,6 +1275,19 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1422 ewma_add(&sta->avg_signal, -status->signal); 1275 ewma_add(&sta->avg_signal, -status->signal);
1423 } 1276 }
1424 1277
1278 if (status->chains) {
1279 sta->chains = status->chains;
1280 for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
1281 int signal = status->chain_signal[i];
1282
1283 if (!(status->chains & BIT(i)))
1284 continue;
1285
1286 sta->chain_signal_last[i] = signal;
1287 ewma_add(&sta->chain_signal_avg[i], -signal);
1288 }
1289 }
1290
1425 /* 1291 /*
1426 * Change STA power saving mode only at the end of a frame 1292 * Change STA power saving mode only at the end of a frame
1427 * exchange sequence. 1293 * exchange sequence.
@@ -1489,6 +1355,207 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1489 return RX_CONTINUE; 1355 return RX_CONTINUE;
1490} /* ieee80211_rx_h_sta_process */ 1356} /* ieee80211_rx_h_sta_process */
1491 1357
1358static ieee80211_rx_result debug_noinline
1359ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
1360{
1361 struct sk_buff *skb = rx->skb;
1362 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
1363 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
1364 int keyidx;
1365 int hdrlen;
1366 ieee80211_rx_result result = RX_DROP_UNUSABLE;
1367 struct ieee80211_key *sta_ptk = NULL;
1368 int mmie_keyidx = -1;
1369 __le16 fc;
1370
1371 /*
1372 * Key selection 101
1373 *
1374 * There are four types of keys:
1375 * - GTK (group keys)
1376 * - IGTK (group keys for management frames)
1377 * - PTK (pairwise keys)
1378 * - STK (station-to-station pairwise keys)
1379 *
1380 * When selecting a key, we have to distinguish between multicast
1381 * (including broadcast) and unicast frames, the latter can only
1382 * use PTKs and STKs while the former always use GTKs and IGTKs.
1383 * Unless, of course, actual WEP keys ("pre-RSNA") are used, then
1384 * unicast frames can also use key indices like GTKs. Hence, if we
1385 * don't have a PTK/STK we check the key index for a WEP key.
1386 *
1387 * Note that in a regular BSS, multicast frames are sent by the
1388 * AP only, associated stations unicast the frame to the AP first
1389 * which then multicasts it on their behalf.
1390 *
1391 * There is also a slight problem in IBSS mode: GTKs are negotiated
1392 * with each station, that is something we don't currently handle.
1393 * The spec seems to expect that one negotiates the same key with
1394 * every station but there's no such requirement; VLANs could be
1395 * possible.
1396 */
1397
1398 /*
1399 * No point in finding a key and decrypting if the frame is neither
1400 * addressed to us nor a multicast frame.
1401 */
1402 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1403 return RX_CONTINUE;
1404
1405 /* start without a key */
1406 rx->key = NULL;
1407
1408 if (rx->sta)
1409 sta_ptk = rcu_dereference(rx->sta->ptk);
1410
1411 fc = hdr->frame_control;
1412
1413 if (!ieee80211_has_protected(fc))
1414 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
1415
1416 if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) {
1417 rx->key = sta_ptk;
1418 if ((status->flag & RX_FLAG_DECRYPTED) &&
1419 (status->flag & RX_FLAG_IV_STRIPPED))
1420 return RX_CONTINUE;
1421 /* Skip decryption if the frame is not protected. */
1422 if (!ieee80211_has_protected(fc))
1423 return RX_CONTINUE;
1424 } else if (mmie_keyidx >= 0) {
1425 /* Broadcast/multicast robust management frame / BIP */
1426 if ((status->flag & RX_FLAG_DECRYPTED) &&
1427 (status->flag & RX_FLAG_IV_STRIPPED))
1428 return RX_CONTINUE;
1429
1430 if (mmie_keyidx < NUM_DEFAULT_KEYS ||
1431 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
1432 return RX_DROP_MONITOR; /* unexpected BIP keyidx */
1433 if (rx->sta)
1434 rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
1435 if (!rx->key)
1436 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
1437 } else if (!ieee80211_has_protected(fc)) {
1438 /*
1439 * The frame was not protected, so skip decryption. However, we
1440 * need to set rx->key if there is a key that could have been
1441 * used so that the frame may be dropped if encryption would
1442 * have been expected.
1443 */
1444 struct ieee80211_key *key = NULL;
1445 struct ieee80211_sub_if_data *sdata = rx->sdata;
1446 int i;
1447
1448 if (ieee80211_is_mgmt(fc) &&
1449 is_multicast_ether_addr(hdr->addr1) &&
1450 (key = rcu_dereference(rx->sdata->default_mgmt_key)))
1451 rx->key = key;
1452 else {
1453 if (rx->sta) {
1454 for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
1455 key = rcu_dereference(rx->sta->gtk[i]);
1456 if (key)
1457 break;
1458 }
1459 }
1460 if (!key) {
1461 for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
1462 key = rcu_dereference(sdata->keys[i]);
1463 if (key)
1464 break;
1465 }
1466 }
1467 if (key)
1468 rx->key = key;
1469 }
1470 return RX_CONTINUE;
1471 } else {
1472 u8 keyid;
1473 /*
1474 * The device doesn't give us the IV so we won't be
1475 * able to look up the key. That's ok though, we
1476 * don't need to decrypt the frame, we just won't
1477 * be able to keep statistics accurate.
1478 * Except for key threshold notifications, should
1479 * we somehow allow the driver to tell us which key
1480 * the hardware used if this flag is set?
1481 */
1482 if ((status->flag & RX_FLAG_DECRYPTED) &&
1483 (status->flag & RX_FLAG_IV_STRIPPED))
1484 return RX_CONTINUE;
1485
1486 hdrlen = ieee80211_hdrlen(fc);
1487
1488 if (rx->skb->len < 8 + hdrlen)
1489 return RX_DROP_UNUSABLE; /* TODO: count this? */
1490
1491 /*
1492 * no need to call ieee80211_wep_get_keyidx,
1493 * it verifies a bunch of things we've done already
1494 */
1495 skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
1496 keyidx = keyid >> 6;
1497
1498 /* check per-station GTK first, if multicast packet */
1499 if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
1500 rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
1501
1502 /* if not found, try default key */
1503 if (!rx->key) {
1504 rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
1505
1506 /*
1507 * RSNA-protected unicast frames should always be
1508 * sent with pairwise or station-to-station keys,
1509 * but for WEP we allow using a key index as well.
1510 */
1511 if (rx->key &&
1512 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
1513 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
1514 !is_multicast_ether_addr(hdr->addr1))
1515 rx->key = NULL;
1516 }
1517 }
1518
1519 if (rx->key) {
1520 if (unlikely(rx->key->flags & KEY_FLAG_TAINTED))
1521 return RX_DROP_MONITOR;
1522
1523 rx->key->tx_rx_count++;
1524 /* TODO: add threshold stuff again */
1525 } else {
1526 return RX_DROP_MONITOR;
1527 }
1528
1529 switch (rx->key->conf.cipher) {
1530 case WLAN_CIPHER_SUITE_WEP40:
1531 case WLAN_CIPHER_SUITE_WEP104:
1532 result = ieee80211_crypto_wep_decrypt(rx);
1533 break;
1534 case WLAN_CIPHER_SUITE_TKIP:
1535 result = ieee80211_crypto_tkip_decrypt(rx);
1536 break;
1537 case WLAN_CIPHER_SUITE_CCMP:
1538 result = ieee80211_crypto_ccmp_decrypt(rx);
1539 break;
1540 case WLAN_CIPHER_SUITE_AES_CMAC:
1541 result = ieee80211_crypto_aes_cmac_decrypt(rx);
1542 break;
1543 default:
1544 /*
1545 * We can reach here only with HW-only algorithms
1546 * but why didn't it decrypt the frame?!
1547 */
1548 return RX_DROP_UNUSABLE;
1549 }
1550
1551 /* the hdr variable is invalid after the decrypt handlers */
1552
1553 /* either the frame has been decrypted or will be dropped */
1554 status->flag |= RX_FLAG_DECRYPTED;
1555
1556 return result;
1557}
1558
1492static inline struct ieee80211_fragment_entry * 1559static inline struct ieee80211_fragment_entry *
1493ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, 1560ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
1494 unsigned int frag, unsigned int seq, int rx_queue, 1561 unsigned int frag, unsigned int seq, int rx_queue,
@@ -1608,7 +1675,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1608 entry->ccmp = 1; 1675 entry->ccmp = 1;
1609 memcpy(entry->last_pn, 1676 memcpy(entry->last_pn,
1610 rx->key->u.ccmp.rx_pn[queue], 1677 rx->key->u.ccmp.rx_pn[queue],
1611 CCMP_PN_LEN); 1678 IEEE80211_CCMP_PN_LEN);
1612 } 1679 }
1613 return RX_QUEUED; 1680 return RX_QUEUED;
1614 } 1681 }
@@ -1627,21 +1694,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1627 * (IEEE 802.11i, 8.3.3.4.5) */ 1694 * (IEEE 802.11i, 8.3.3.4.5) */
1628 if (entry->ccmp) { 1695 if (entry->ccmp) {
1629 int i; 1696 int i;
1630 u8 pn[CCMP_PN_LEN], *rpn; 1697 u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
1631 int queue; 1698 int queue;
1632 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP) 1699 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP)
1633 return RX_DROP_UNUSABLE; 1700 return RX_DROP_UNUSABLE;
1634 memcpy(pn, entry->last_pn, CCMP_PN_LEN); 1701 memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
1635 for (i = CCMP_PN_LEN - 1; i >= 0; i--) { 1702 for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
1636 pn[i]++; 1703 pn[i]++;
1637 if (pn[i]) 1704 if (pn[i])
1638 break; 1705 break;
1639 } 1706 }
1640 queue = rx->security_idx; 1707 queue = rx->security_idx;
1641 rpn = rx->key->u.ccmp.rx_pn[queue]; 1708 rpn = rx->key->u.ccmp.rx_pn[queue];
1642 if (memcmp(pn, rpn, CCMP_PN_LEN)) 1709 if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
1643 return RX_DROP_UNUSABLE; 1710 return RX_DROP_UNUSABLE;
1644 memcpy(entry->last_pn, pn, CCMP_PN_LEN); 1711 memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
1645 } 1712 }
1646 1713
1647 skb_pull(rx->skb, ieee80211_hdrlen(fc)); 1714 skb_pull(rx->skb, ieee80211_hdrlen(fc));
@@ -1729,27 +1796,21 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
1729 if (unlikely(!ieee80211_has_protected(fc) && 1796 if (unlikely(!ieee80211_has_protected(fc) &&
1730 ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && 1797 ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
1731 rx->key)) { 1798 rx->key)) {
1732 if (ieee80211_is_deauth(fc)) 1799 if (ieee80211_is_deauth(fc) ||
1733 cfg80211_send_unprot_deauth(rx->sdata->dev, 1800 ieee80211_is_disassoc(fc))
1734 rx->skb->data, 1801 cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
1735 rx->skb->len); 1802 rx->skb->data,
1736 else if (ieee80211_is_disassoc(fc)) 1803 rx->skb->len);
1737 cfg80211_send_unprot_disassoc(rx->sdata->dev,
1738 rx->skb->data,
1739 rx->skb->len);
1740 return -EACCES; 1804 return -EACCES;
1741 } 1805 }
1742 /* BIP does not use Protected field, so need to check MMIE */ 1806 /* BIP does not use Protected field, so need to check MMIE */
1743 if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && 1807 if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) &&
1744 ieee80211_get_mmie_keyidx(rx->skb) < 0)) { 1808 ieee80211_get_mmie_keyidx(rx->skb) < 0)) {
1745 if (ieee80211_is_deauth(fc)) 1809 if (ieee80211_is_deauth(fc) ||
1746 cfg80211_send_unprot_deauth(rx->sdata->dev, 1810 ieee80211_is_disassoc(fc))
1747 rx->skb->data, 1811 cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
1748 rx->skb->len); 1812 rx->skb->data,
1749 else if (ieee80211_is_disassoc(fc)) 1813 rx->skb->len);
1750 cfg80211_send_unprot_disassoc(rx->sdata->dev,
1751 rx->skb->data,
1752 rx->skb->len);
1753 return -EACCES; 1814 return -EACCES;
1754 } 1815 }
1755 /* 1816 /*
@@ -2623,8 +2684,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
2623 sig = status->signal; 2684 sig = status->signal;
2624 2685
2625 if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig, 2686 if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
2626 rx->skb->data, rx->skb->len, 2687 rx->skb->data, rx->skb->len, 0, GFP_ATOMIC)) {
2627 GFP_ATOMIC)) {
2628 if (rx->sta) 2688 if (rx->sta)
2629 rx->sta->rx_packets++; 2689 rx->sta->rx_packets++;
2630 dev_kfree_skb(rx->skb); 2690 dev_kfree_skb(rx->skb);
@@ -2878,10 +2938,10 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
2878 */ 2938 */
2879 rx->skb = skb; 2939 rx->skb = skb;
2880 2940
2881 CALL_RXH(ieee80211_rx_h_decrypt)
2882 CALL_RXH(ieee80211_rx_h_check_more_data) 2941 CALL_RXH(ieee80211_rx_h_check_more_data)
2883 CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll) 2942 CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll)
2884 CALL_RXH(ieee80211_rx_h_sta_process) 2943 CALL_RXH(ieee80211_rx_h_sta_process)
2944 CALL_RXH(ieee80211_rx_h_decrypt)
2885 CALL_RXH(ieee80211_rx_h_defragment) 2945 CALL_RXH(ieee80211_rx_h_defragment)
2886 CALL_RXH(ieee80211_rx_h_michael_mic_verify) 2946 CALL_RXH(ieee80211_rx_h_michael_mic_verify)
2887 /* must be after MMIC verify so header is counted in MPDU mic */ 2947 /* must be after MMIC verify so header is counted in MPDU mic */
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 99b103921a4b..08afe74b98f4 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -66,6 +66,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
66 struct cfg80211_bss *cbss; 66 struct cfg80211_bss *cbss;
67 struct ieee80211_bss *bss; 67 struct ieee80211_bss *bss;
68 int clen, srlen; 68 int clen, srlen;
69 enum nl80211_bss_scan_width scan_width;
69 s32 signal = 0; 70 s32 signal = 0;
70 71
71 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 72 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
@@ -73,8 +74,15 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
73 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) 74 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
74 signal = (rx_status->signal * 100) / local->hw.max_signal; 75 signal = (rx_status->signal * 100) / local->hw.max_signal;
75 76
76 cbss = cfg80211_inform_bss_frame(local->hw.wiphy, channel, 77 scan_width = NL80211_BSS_CHAN_WIDTH_20;
77 mgmt, len, signal, GFP_ATOMIC); 78 if (rx_status->flag & RX_FLAG_5MHZ)
79 scan_width = NL80211_BSS_CHAN_WIDTH_5;
80 if (rx_status->flag & RX_FLAG_10MHZ)
81 scan_width = NL80211_BSS_CHAN_WIDTH_10;
82
83 cbss = cfg80211_inform_bss_width_frame(local->hw.wiphy, channel,
84 scan_width, mgmt, len, signal,
85 GFP_ATOMIC);
78 if (!cbss) 86 if (!cbss)
79 return NULL; 87 return NULL;
80 88
@@ -140,6 +148,15 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
140 bss->valid_data |= IEEE80211_BSS_VALID_WMM; 148 bss->valid_data |= IEEE80211_BSS_VALID_WMM;
141 } 149 }
142 150
151 if (beacon) {
152 struct ieee80211_supported_band *sband =
153 local->hw.wiphy->bands[rx_status->band];
154 if (!(rx_status->flag & RX_FLAG_HT) &&
155 !(rx_status->flag & RX_FLAG_VHT))
156 bss->beacon_rate =
157 &sband->bitrates[rx_status->rate_idx];
158 }
159
143 return bss; 160 return bss;
144} 161}
145 162
@@ -195,10 +212,29 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
195 ieee80211_rx_bss_put(local, bss); 212 ieee80211_rx_bss_put(local, bss);
196} 213}
197 214
215static void
216ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef,
217 enum nl80211_bss_scan_width scan_width)
218{
219 memset(chandef, 0, sizeof(*chandef));
220 switch (scan_width) {
221 case NL80211_BSS_CHAN_WIDTH_5:
222 chandef->width = NL80211_CHAN_WIDTH_5;
223 break;
224 case NL80211_BSS_CHAN_WIDTH_10:
225 chandef->width = NL80211_CHAN_WIDTH_10;
226 break;
227 default:
228 chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
229 break;
230 }
231}
232
198/* return false if no more work */ 233/* return false if no more work */
199static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) 234static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
200{ 235{
201 struct cfg80211_scan_request *req = local->scan_req; 236 struct cfg80211_scan_request *req = local->scan_req;
237 struct cfg80211_chan_def chandef;
202 enum ieee80211_band band; 238 enum ieee80211_band band;
203 int i, ielen, n_chans; 239 int i, ielen, n_chans;
204 240
@@ -220,11 +256,12 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
220 } while (!n_chans); 256 } while (!n_chans);
221 257
222 local->hw_scan_req->n_channels = n_chans; 258 local->hw_scan_req->n_channels = n_chans;
259 ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
223 260
224 ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie, 261 ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie,
225 local->hw_scan_ies_bufsize, 262 local->hw_scan_ies_bufsize,
226 req->ie, req->ie_len, band, 263 req->ie, req->ie_len, band,
227 req->rates[band], 0); 264 req->rates[band], &chandef);
228 local->hw_scan_req->ie_len = ielen; 265 local->hw_scan_req->ie_len = ielen;
229 local->hw_scan_req->no_cck = req->no_cck; 266 local->hw_scan_req->no_cck = req->no_cck;
230 267
@@ -271,7 +308,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
271 rcu_assign_pointer(local->scan_sdata, NULL); 308 rcu_assign_pointer(local->scan_sdata, NULL);
272 309
273 local->scanning = 0; 310 local->scanning = 0;
274 local->scan_channel = NULL; 311 local->scan_chandef.chan = NULL;
275 312
276 /* Set power back to normal operating levels. */ 313 /* Set power back to normal operating levels. */
277 ieee80211_hw_config(local, 0); 314 ieee80211_hw_config(local, 0);
@@ -606,11 +643,34 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
606{ 643{
607 int skip; 644 int skip;
608 struct ieee80211_channel *chan; 645 struct ieee80211_channel *chan;
646 enum nl80211_bss_scan_width oper_scan_width;
609 647
610 skip = 0; 648 skip = 0;
611 chan = local->scan_req->channels[local->scan_channel_idx]; 649 chan = local->scan_req->channels[local->scan_channel_idx];
612 650
613 local->scan_channel = chan; 651 local->scan_chandef.chan = chan;
652 local->scan_chandef.center_freq1 = chan->center_freq;
653 local->scan_chandef.center_freq2 = 0;
654 switch (local->scan_req->scan_width) {
655 case NL80211_BSS_CHAN_WIDTH_5:
656 local->scan_chandef.width = NL80211_CHAN_WIDTH_5;
657 break;
658 case NL80211_BSS_CHAN_WIDTH_10:
659 local->scan_chandef.width = NL80211_CHAN_WIDTH_10;
660 break;
661 case NL80211_BSS_CHAN_WIDTH_20:
662 /* If scanning on oper channel, use whatever channel-type
663 * is currently in use.
664 */
665 oper_scan_width = cfg80211_chandef_to_scan_width(
666 &local->_oper_chandef);
667 if (chan == local->_oper_chandef.chan &&
668 oper_scan_width == local->scan_req->scan_width)
669 local->scan_chandef = local->_oper_chandef;
670 else
671 local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
672 break;
673 }
614 674
615 if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) 675 if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL))
616 skip = 1; 676 skip = 1;
@@ -650,7 +710,7 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local,
650 unsigned long *next_delay) 710 unsigned long *next_delay)
651{ 711{
652 /* switch back to the operating channel */ 712 /* switch back to the operating channel */
653 local->scan_channel = NULL; 713 local->scan_chandef.chan = NULL;
654 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 714 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
655 715
656 /* disable PS */ 716 /* disable PS */
@@ -792,7 +852,8 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
792 852
793int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, 853int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
794 const u8 *ssid, u8 ssid_len, 854 const u8 *ssid, u8 ssid_len,
795 struct ieee80211_channel *chan) 855 struct ieee80211_channel *chan,
856 enum nl80211_bss_scan_width scan_width)
796{ 857{
797 struct ieee80211_local *local = sdata->local; 858 struct ieee80211_local *local = sdata->local;
798 int ret = -EBUSY; 859 int ret = -EBUSY;
@@ -842,6 +903,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
842 903
843 local->int_scan_req->ssids = &local->scan_ssid; 904 local->int_scan_req->ssids = &local->scan_ssid;
844 local->int_scan_req->n_ssids = 1; 905 local->int_scan_req->n_ssids = 1;
906 local->int_scan_req->scan_width = scan_width;
845 memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN); 907 memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
846 local->int_scan_req->ssids[0].ssid_len = ssid_len; 908 local->int_scan_req->ssids[0].ssid_len = ssid_len;
847 909
@@ -903,6 +965,7 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
903{ 965{
904 struct ieee80211_local *local = sdata->local; 966 struct ieee80211_local *local = sdata->local;
905 struct ieee80211_sched_scan_ies sched_scan_ies = {}; 967 struct ieee80211_sched_scan_ies sched_scan_ies = {};
968 struct cfg80211_chan_def chandef;
906 int ret, i, iebufsz; 969 int ret, i, iebufsz;
907 970
908 iebufsz = 2 + IEEE80211_MAX_SSID_LEN + 971 iebufsz = 2 + IEEE80211_MAX_SSID_LEN +
@@ -930,10 +993,12 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
930 goto out_free; 993 goto out_free;
931 } 994 }
932 995
996 ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
997
933 sched_scan_ies.len[i] = 998 sched_scan_ies.len[i] =
934 ieee80211_build_preq_ies(local, sched_scan_ies.ie[i], 999 ieee80211_build_preq_ies(local, sched_scan_ies.ie[i],
935 iebufsz, req->ie, req->ie_len, 1000 iebufsz, req->ie, req->ie_len,
936 i, (u32) -1, 0); 1001 i, (u32) -1, &chandef);
937 } 1002 }
938 1003
939 ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); 1004 ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 11216bc13b27..aeb967a0aeed 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -149,6 +149,7 @@ static void cleanup_single_sta(struct sta_info *sta)
149 * directly by station destruction. 149 * directly by station destruction.
150 */ 150 */
151 for (i = 0; i < IEEE80211_NUM_TIDS; i++) { 151 for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
152 kfree(sta->ampdu_mlme.tid_start_tx[i]);
152 tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]); 153 tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]);
153 if (!tid_tx) 154 if (!tid_tx)
154 continue; 155 continue;
@@ -346,6 +347,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
346 if (ieee80211_vif_is_mesh(&sdata->vif) && 347 if (ieee80211_vif_is_mesh(&sdata->vif) &&
347 !sdata->u.mesh.user_mpm) 348 !sdata->u.mesh.user_mpm)
348 init_timer(&sta->plink_timer); 349 init_timer(&sta->plink_timer);
350 sta->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
349#endif 351#endif
350 352
351 memcpy(sta->sta.addr, addr, ETH_ALEN); 353 memcpy(sta->sta.addr, addr, ETH_ALEN);
@@ -358,6 +360,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
358 do_posix_clock_monotonic_gettime(&uptime); 360 do_posix_clock_monotonic_gettime(&uptime);
359 sta->last_connected = uptime.tv_sec; 361 sta->last_connected = uptime.tv_sec;
360 ewma_init(&sta->avg_signal, 1024, 8); 362 ewma_init(&sta->avg_signal, 1024, 8);
363 for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++)
364 ewma_init(&sta->chain_signal_avg[i], 1024, 8);
361 365
362 if (sta_prepare_rate_control(local, sta, gfp)) { 366 if (sta_prepare_rate_control(local, sta, gfp)) {
363 kfree(sta); 367 kfree(sta);
@@ -1130,6 +1134,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
1130 * ends the poll/service period. 1134 * ends the poll/service period.
1131 */ 1135 */
1132 info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | 1136 info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER |
1137 IEEE80211_TX_CTL_PS_RESPONSE |
1133 IEEE80211_TX_STATUS_EOSP | 1138 IEEE80211_TX_STATUS_EOSP |
1134 IEEE80211_TX_CTL_REQ_TX_STATUS; 1139 IEEE80211_TX_CTL_REQ_TX_STATUS;
1135 1140
@@ -1267,7 +1272,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
1267 * STA may still remain is PS mode after this frame 1272 * STA may still remain is PS mode after this frame
1268 * exchange. 1273 * exchange.
1269 */ 1274 */
1270 info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; 1275 info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER |
1276 IEEE80211_TX_CTL_PS_RESPONSE;
1271 1277
1272 /* 1278 /*
1273 * Use MoreData flag to indicate whether there are 1279 * Use MoreData flag to indicate whether there are
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index adc30045f99e..4208dbd5861f 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -203,6 +203,7 @@ struct tid_ampdu_rx {
203 * driver requested to close until the work for it runs 203 * driver requested to close until the work for it runs
204 * @mtx: mutex to protect all TX data (except non-NULL assignments 204 * @mtx: mutex to protect all TX data (except non-NULL assignments
205 * to tid_tx[idx], which are protected by the sta spinlock) 205 * to tid_tx[idx], which are protected by the sta spinlock)
206 * tid_start_tx is also protected by sta->lock.
206 */ 207 */
207struct sta_ampdu_mlme { 208struct sta_ampdu_mlme {
208 struct mutex mtx; 209 struct mutex mtx;
@@ -297,6 +298,9 @@ struct sta_ampdu_mlme {
297 * @rcu_head: RCU head used for freeing this station struct 298 * @rcu_head: RCU head used for freeing this station struct
298 * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, 299 * @cur_max_bandwidth: maximum bandwidth to use for TX to the station,
299 * taken from HT/VHT capabilities or VHT operating mode notification 300 * taken from HT/VHT capabilities or VHT operating mode notification
301 * @chains: chains ever used for RX from this station
302 * @chain_signal_last: last signal (per chain)
303 * @chain_signal_avg: signal average (per chain)
300 */ 304 */
301struct sta_info { 305struct sta_info {
302 /* General information, mostly static */ 306 /* General information, mostly static */
@@ -344,6 +348,11 @@ struct sta_info {
344 int last_signal; 348 int last_signal;
345 struct ewma avg_signal; 349 struct ewma avg_signal;
346 int last_ack_signal; 350 int last_ack_signal;
351
352 u8 chains;
353 s8 chain_signal_last[IEEE80211_MAX_CHAINS];
354 struct ewma chain_signal_avg[IEEE80211_MAX_CHAINS];
355
347 /* Plus 1 for non-QoS frames */ 356 /* Plus 1 for non-QoS frames */
348 __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; 357 __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1];
349 358
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 43439203f4e4..368837fe3b80 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -235,7 +235,8 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info)
235 235
236 /* IEEE80211_RADIOTAP_RATE rate */ 236 /* IEEE80211_RADIOTAP_RATE rate */
237 if (info->status.rates[0].idx >= 0 && 237 if (info->status.rates[0].idx >= 0 &&
238 !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) 238 !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
239 IEEE80211_TX_RC_VHT_MCS)))
239 len += 2; 240 len += 2;
240 241
241 /* IEEE80211_RADIOTAP_TX_FLAGS */ 242 /* IEEE80211_RADIOTAP_TX_FLAGS */
@@ -244,17 +245,23 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info)
244 /* IEEE80211_RADIOTAP_DATA_RETRIES */ 245 /* IEEE80211_RADIOTAP_DATA_RETRIES */
245 len += 1; 246 len += 1;
246 247
247 /* IEEE80211_TX_RC_MCS */ 248 /* IEEE80211_RADIOTAP_MCS
248 if (info->status.rates[0].idx >= 0 && 249 * IEEE80211_RADIOTAP_VHT */
249 info->status.rates[0].flags & IEEE80211_TX_RC_MCS) 250 if (info->status.rates[0].idx >= 0) {
250 len += 3; 251 if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS)
252 len += 3;
253 else if (info->status.rates[0].flags & IEEE80211_TX_RC_VHT_MCS)
254 len = ALIGN(len, 2) + 12;
255 }
251 256
252 return len; 257 return len;
253} 258}
254 259
255static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band 260static void
256 *sband, struct sk_buff *skb, 261ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
257 int retry_count, int rtap_len) 262 struct ieee80211_supported_band *sband,
263 struct sk_buff *skb, int retry_count,
264 int rtap_len, int shift)
258{ 265{
259 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 266 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
260 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 267 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -279,9 +286,13 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
279 286
280 /* IEEE80211_RADIOTAP_RATE */ 287 /* IEEE80211_RADIOTAP_RATE */
281 if (info->status.rates[0].idx >= 0 && 288 if (info->status.rates[0].idx >= 0 &&
282 !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) { 289 !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
290 IEEE80211_TX_RC_VHT_MCS))) {
291 u16 rate;
292
283 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); 293 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
284 *pos = sband->bitrates[info->status.rates[0].idx].bitrate / 5; 294 rate = sband->bitrates[info->status.rates[0].idx].bitrate;
295 *pos = DIV_ROUND_UP(rate, 5 * (1 << shift));
285 /* padding for tx flags */ 296 /* padding for tx flags */
286 pos += 2; 297 pos += 2;
287 } 298 }
@@ -306,9 +317,12 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
306 *pos = retry_count; 317 *pos = retry_count;
307 pos++; 318 pos++;
308 319
309 /* IEEE80211_TX_RC_MCS */ 320 if (info->status.rates[0].idx < 0)
310 if (info->status.rates[0].idx >= 0 && 321 return;
311 info->status.rates[0].flags & IEEE80211_TX_RC_MCS) { 322
323 /* IEEE80211_RADIOTAP_MCS
324 * IEEE80211_RADIOTAP_VHT */
325 if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS) {
312 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); 326 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
313 pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS | 327 pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
314 IEEE80211_RADIOTAP_MCS_HAVE_GI | 328 IEEE80211_RADIOTAP_MCS_HAVE_GI |
@@ -321,8 +335,48 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
321 pos[1] |= IEEE80211_RADIOTAP_MCS_FMT_GF; 335 pos[1] |= IEEE80211_RADIOTAP_MCS_FMT_GF;
322 pos[2] = info->status.rates[0].idx; 336 pos[2] = info->status.rates[0].idx;
323 pos += 3; 337 pos += 3;
324 } 338 } else if (info->status.rates[0].flags & IEEE80211_TX_RC_VHT_MCS) {
339 u16 known = local->hw.radiotap_vht_details &
340 (IEEE80211_RADIOTAP_VHT_KNOWN_GI |
341 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
342
343 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
344
345 /* required alignment from rthdr */
346 pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
325 347
348 /* u16 known - IEEE80211_RADIOTAP_VHT_KNOWN_* */
349 put_unaligned_le16(known, pos);
350 pos += 2;
351
352 /* u8 flags - IEEE80211_RADIOTAP_VHT_FLAG_* */
353 if (info->status.rates[0].flags & IEEE80211_TX_RC_SHORT_GI)
354 *pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI;
355 pos++;
356
357 /* u8 bandwidth */
358 if (info->status.rates[0].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
359 *pos = 1;
360 else if (info->status.rates[0].flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
361 *pos = 4;
362 else if (info->status.rates[0].flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
363 *pos = 11;
364 else /* IEEE80211_TX_RC_{20_MHZ_WIDTH,FIXME:DUP_DATA} */
365 *pos = 0;
366 pos++;
367
368 /* u8 mcs_nss[4] */
369 *pos = (ieee80211_rate_get_vht_mcs(&info->status.rates[0]) << 4) |
370 ieee80211_rate_get_vht_nss(&info->status.rates[0]);
371 pos += 4;
372
373 /* u8 coding */
374 pos++;
375 /* u8 group_id */
376 pos++;
377 /* u16 partial_aid */
378 pos += 2;
379 }
326} 380}
327 381
328static void ieee80211_report_used_skb(struct ieee80211_local *local, 382static void ieee80211_report_used_skb(struct ieee80211_local *local,
@@ -424,6 +478,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
424 bool acked; 478 bool acked;
425 struct ieee80211_bar *bar; 479 struct ieee80211_bar *bar;
426 int rtap_len; 480 int rtap_len;
481 int shift = 0;
427 482
428 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { 483 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
429 if ((info->flags & IEEE80211_TX_CTL_AMPDU) && 484 if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
@@ -458,6 +513,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
458 if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr)) 513 if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr))
459 continue; 514 continue;
460 515
516 shift = ieee80211_vif_get_shift(&sta->sdata->vif);
517
461 if (info->flags & IEEE80211_TX_STATUS_EOSP) 518 if (info->flags & IEEE80211_TX_STATUS_EOSP)
462 clear_sta_flag(sta, WLAN_STA_SP); 519 clear_sta_flag(sta, WLAN_STA_SP);
463 520
@@ -557,7 +614,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
557 614
558 rcu_read_unlock(); 615 rcu_read_unlock();
559 616
560 ieee80211_led_tx(local, 0); 617 ieee80211_led_tx(local);
561 618
562 /* SNMP counters 619 /* SNMP counters
563 * Fragments are passed to low-level drivers as separate skbs, so these 620 * Fragments are passed to low-level drivers as separate skbs, so these
@@ -624,7 +681,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
624 dev_kfree_skb(skb); 681 dev_kfree_skb(skb);
625 return; 682 return;
626 } 683 }
627 ieee80211_add_tx_radiotap_header(sband, skb, retry_count, rtap_len); 684 ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
685 rtap_len, shift);
628 686
629 /* XXX: is this sufficient for BPF? */ 687 /* XXX: is this sufficient for BPF? */
630 skb_set_mac_header(skb, 0); 688 skb_set_mac_header(skb, 0);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index c215fafd7a2f..1aba645882bd 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1906,6 +1906,32 @@ TRACE_EVENT(api_radar_detected,
1906 ) 1906 )
1907); 1907);
1908 1908
1909TRACE_EVENT(drv_channel_switch_beacon,
1910 TP_PROTO(struct ieee80211_local *local,
1911 struct ieee80211_sub_if_data *sdata,
1912 struct cfg80211_chan_def *chandef),
1913
1914 TP_ARGS(local, sdata, chandef),
1915
1916 TP_STRUCT__entry(
1917 LOCAL_ENTRY
1918 VIF_ENTRY
1919 CHANDEF_ENTRY
1920 ),
1921
1922 TP_fast_assign(
1923 LOCAL_ASSIGN;
1924 VIF_ASSIGN;
1925 CHANDEF_ASSIGN(chandef);
1926 ),
1927
1928 TP_printk(
1929 LOCAL_PR_FMT VIF_PR_FMT " channel switch to " CHANDEF_PR_FMT,
1930 LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG
1931 )
1932);
1933
1934
1909#ifdef CONFIG_MAC80211_MESSAGE_TRACING 1935#ifdef CONFIG_MAC80211_MESSAGE_TRACING
1910#undef TRACE_SYSTEM 1936#undef TRACE_SYSTEM
1911#define TRACE_SYSTEM mac80211_msg 1937#define TRACE_SYSTEM mac80211_msg
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9972e07a2f96..3456c0486b48 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -40,12 +40,22 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
40 struct sk_buff *skb, int group_addr, 40 struct sk_buff *skb, int group_addr,
41 int next_frag_len) 41 int next_frag_len)
42{ 42{
43 int rate, mrate, erp, dur, i; 43 int rate, mrate, erp, dur, i, shift = 0;
44 struct ieee80211_rate *txrate; 44 struct ieee80211_rate *txrate;
45 struct ieee80211_local *local = tx->local; 45 struct ieee80211_local *local = tx->local;
46 struct ieee80211_supported_band *sband; 46 struct ieee80211_supported_band *sband;
47 struct ieee80211_hdr *hdr; 47 struct ieee80211_hdr *hdr;
48 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 48 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
49 struct ieee80211_chanctx_conf *chanctx_conf;
50 u32 rate_flags = 0;
51
52 rcu_read_lock();
53 chanctx_conf = rcu_dereference(tx->sdata->vif.chanctx_conf);
54 if (chanctx_conf) {
55 shift = ieee80211_chandef_get_shift(&chanctx_conf->def);
56 rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
57 }
58 rcu_read_unlock();
49 59
50 /* assume HW handles this */ 60 /* assume HW handles this */
51 if (tx->rate.flags & IEEE80211_TX_RC_MCS) 61 if (tx->rate.flags & IEEE80211_TX_RC_MCS)
@@ -122,8 +132,11 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
122 if (r->bitrate > txrate->bitrate) 132 if (r->bitrate > txrate->bitrate)
123 break; 133 break;
124 134
135 if ((rate_flags & r->flags) != rate_flags)
136 continue;
137
125 if (tx->sdata->vif.bss_conf.basic_rates & BIT(i)) 138 if (tx->sdata->vif.bss_conf.basic_rates & BIT(i))
126 rate = r->bitrate; 139 rate = DIV_ROUND_UP(r->bitrate, 1 << shift);
127 140
128 switch (sband->band) { 141 switch (sband->band) {
129 case IEEE80211_BAND_2GHZ: { 142 case IEEE80211_BAND_2GHZ: {
@@ -150,7 +163,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
150 if (rate == -1) { 163 if (rate == -1) {
151 /* No matching basic rate found; use highest suitable mandatory 164 /* No matching basic rate found; use highest suitable mandatory
152 * PHY rate */ 165 * PHY rate */
153 rate = mrate; 166 rate = DIV_ROUND_UP(mrate, 1 << shift);
154 } 167 }
155 168
156 /* Don't calculate ACKs for QoS Frames with NoAck Policy set */ 169 /* Don't calculate ACKs for QoS Frames with NoAck Policy set */
@@ -162,7 +175,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
162 * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up 175 * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up
163 * to closest integer */ 176 * to closest integer */
164 dur = ieee80211_frame_duration(sband->band, 10, rate, erp, 177 dur = ieee80211_frame_duration(sband->band, 10, rate, erp,
165 tx->sdata->vif.bss_conf.use_short_preamble); 178 tx->sdata->vif.bss_conf.use_short_preamble,
179 shift);
166 180
167 if (next_frag_len) { 181 if (next_frag_len) {
168 /* Frame is fragmented: duration increases with time needed to 182 /* Frame is fragmented: duration increases with time needed to
@@ -171,7 +185,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
171 /* next fragment */ 185 /* next fragment */
172 dur += ieee80211_frame_duration(sband->band, next_frag_len, 186 dur += ieee80211_frame_duration(sband->band, next_frag_len,
173 txrate->bitrate, erp, 187 txrate->bitrate, erp,
174 tx->sdata->vif.bss_conf.use_short_preamble); 188 tx->sdata->vif.bss_conf.use_short_preamble,
189 shift);
175 } 190 }
176 191
177 return cpu_to_le16(dur); 192 return cpu_to_le16(dur);
@@ -398,13 +413,14 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
398 if (ieee80211_has_order(hdr->frame_control)) 413 if (ieee80211_has_order(hdr->frame_control))
399 return TX_CONTINUE; 414 return TX_CONTINUE;
400 415
416 if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
417 info->hw_queue = tx->sdata->vif.cab_queue;
418
401 /* no stations in PS mode */ 419 /* no stations in PS mode */
402 if (!atomic_read(&ps->num_sta_ps)) 420 if (!atomic_read(&ps->num_sta_ps))
403 return TX_CONTINUE; 421 return TX_CONTINUE;
404 422
405 info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; 423 info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM;
406 if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
407 info->hw_queue = tx->sdata->vif.cab_queue;
408 424
409 /* device releases frame after DTIM beacon */ 425 /* device releases frame after DTIM beacon */
410 if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)) 426 if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING))
@@ -523,9 +539,11 @@ ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx)
523{ 539{
524 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); 540 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
525 541
526 if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol && 542 if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol)) {
527 tx->sdata->control_port_no_encrypt)) 543 if (tx->sdata->control_port_no_encrypt)
528 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; 544 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
545 info->control.flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO;
546 }
529 547
530 return TX_CONTINUE; 548 return TX_CONTINUE;
531} 549}
@@ -763,9 +781,11 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
763 /* 781 /*
764 * Anything but QoS data that has a sequence number field 782 * Anything but QoS data that has a sequence number field
765 * (is long enough) gets a sequence number from the global 783 * (is long enough) gets a sequence number from the global
766 * counter. 784 * counter. QoS data frames with a multicast destination
785 * also use the global counter (802.11-2012 9.3.2.10).
767 */ 786 */
768 if (!ieee80211_is_data_qos(hdr->frame_control)) { 787 if (!ieee80211_is_data_qos(hdr->frame_control) ||
788 is_multicast_ether_addr(hdr->addr1)) {
769 /* driver should assign sequence number */ 789 /* driver should assign sequence number */
770 info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; 790 info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
771 /* for pure STA mode without beacons, we can do it */ 791 /* for pure STA mode without beacons, we can do it */
@@ -1256,6 +1276,10 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
1256 1276
1257 switch (sdata->vif.type) { 1277 switch (sdata->vif.type) {
1258 case NL80211_IFTYPE_MONITOR: 1278 case NL80211_IFTYPE_MONITOR:
1279 if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) {
1280 vif = &sdata->vif;
1281 break;
1282 }
1259 sdata = rcu_dereference(local->monitor_sdata); 1283 sdata = rcu_dereference(local->monitor_sdata);
1260 if (sdata) { 1284 if (sdata) {
1261 vif = &sdata->vif; 1285 vif = &sdata->vif;
@@ -1280,7 +1304,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
1280 txpending); 1304 txpending);
1281 1305
1282 ieee80211_tpt_led_trig_tx(local, fc, led_len); 1306 ieee80211_tpt_led_trig_tx(local, fc, led_len);
1283 ieee80211_led_tx(local, 1);
1284 1307
1285 WARN_ON_ONCE(!skb_queue_empty(skbs)); 1308 WARN_ON_ONCE(!skb_queue_empty(skbs));
1286 1309
@@ -1789,12 +1812,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1789 break; 1812 break;
1790#ifdef CONFIG_MAC80211_MESH 1813#ifdef CONFIG_MAC80211_MESH
1791 case NL80211_IFTYPE_MESH_POINT: 1814 case NL80211_IFTYPE_MESH_POINT:
1792 if (!sdata->u.mesh.mshcfg.dot11MeshTTL) {
1793 /* Do not send frames with mesh_ttl == 0 */
1794 sdata->u.mesh.mshstats.dropped_frames_ttl++;
1795 goto fail_rcu;
1796 }
1797
1798 if (!is_multicast_ether_addr(skb->data)) { 1815 if (!is_multicast_ether_addr(skb->data)) {
1799 struct sta_info *next_hop; 1816 struct sta_info *next_hop;
1800 bool mpp_lookup = true; 1817 bool mpp_lookup = true;
@@ -2325,6 +2342,81 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
2325 return 0; 2342 return 0;
2326} 2343}
2327 2344
2345void ieee80211_csa_finish(struct ieee80211_vif *vif)
2346{
2347 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
2348
2349 ieee80211_queue_work(&sdata->local->hw,
2350 &sdata->csa_finalize_work);
2351}
2352EXPORT_SYMBOL(ieee80211_csa_finish);
2353
2354static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
2355 struct beacon_data *beacon)
2356{
2357 struct probe_resp *resp;
2358 int counter_offset_beacon = sdata->csa_counter_offset_beacon;
2359 int counter_offset_presp = sdata->csa_counter_offset_presp;
2360
2361 /* warn if the driver did not check for/react to csa completeness */
2362 if (WARN_ON(((u8 *)beacon->tail)[counter_offset_beacon] == 0))
2363 return;
2364
2365 ((u8 *)beacon->tail)[counter_offset_beacon]--;
2366
2367 if (sdata->vif.type == NL80211_IFTYPE_AP &&
2368 counter_offset_presp) {
2369 rcu_read_lock();
2370 resp = rcu_dereference(sdata->u.ap.probe_resp);
2371
2372 /* if nl80211 accepted the offset, this should not happen. */
2373 if (WARN_ON(!resp)) {
2374 rcu_read_unlock();
2375 return;
2376 }
2377 resp->data[counter_offset_presp]--;
2378 rcu_read_unlock();
2379 }
2380}
2381
2382bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
2383{
2384 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
2385 struct beacon_data *beacon = NULL;
2386 u8 *beacon_data;
2387 size_t beacon_data_len;
2388 int counter_beacon = sdata->csa_counter_offset_beacon;
2389 int ret = false;
2390
2391 if (!ieee80211_sdata_running(sdata))
2392 return false;
2393
2394 rcu_read_lock();
2395 if (vif->type == NL80211_IFTYPE_AP) {
2396 struct ieee80211_if_ap *ap = &sdata->u.ap;
2397
2398 beacon = rcu_dereference(ap->beacon);
2399 if (WARN_ON(!beacon || !beacon->tail))
2400 goto out;
2401 beacon_data = beacon->tail;
2402 beacon_data_len = beacon->tail_len;
2403 } else {
2404 WARN_ON(1);
2405 goto out;
2406 }
2407
2408 if (WARN_ON(counter_beacon > beacon_data_len))
2409 goto out;
2410
2411 if (beacon_data[counter_beacon] == 0)
2412 ret = true;
2413 out:
2414 rcu_read_unlock();
2415
2416 return ret;
2417}
2418EXPORT_SYMBOL(ieee80211_csa_is_complete);
2419
2328struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, 2420struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2329 struct ieee80211_vif *vif, 2421 struct ieee80211_vif *vif,
2330 u16 *tim_offset, u16 *tim_length) 2422 u16 *tim_offset, u16 *tim_length)
@@ -2355,6 +2447,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2355 struct beacon_data *beacon = rcu_dereference(ap->beacon); 2447 struct beacon_data *beacon = rcu_dereference(ap->beacon);
2356 2448
2357 if (beacon) { 2449 if (beacon) {
2450 if (sdata->vif.csa_active)
2451 ieee80211_update_csa(sdata, beacon);
2452
2358 /* 2453 /*
2359 * headroom, head length, 2454 * headroom, head length,
2360 * tail length and maximum TIM length 2455 * tail length and maximum TIM length
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 27e07150eb46..e1b34a18b243 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -107,7 +107,8 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx)
107} 107}
108 108
109int ieee80211_frame_duration(enum ieee80211_band band, size_t len, 109int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
110 int rate, int erp, int short_preamble) 110 int rate, int erp, int short_preamble,
111 int shift)
111{ 112{
112 int dur; 113 int dur;
113 114
@@ -118,6 +119,9 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
118 * 119 *
119 * rate is in 100 kbps, so divident is multiplied by 10 in the 120 * rate is in 100 kbps, so divident is multiplied by 10 in the
120 * DIV_ROUND_UP() operations. 121 * DIV_ROUND_UP() operations.
122 *
123 * shift may be 2 for 5 MHz channels or 1 for 10 MHz channels, and
124 * is assumed to be 0 otherwise.
121 */ 125 */
122 126
123 if (band == IEEE80211_BAND_5GHZ || erp) { 127 if (band == IEEE80211_BAND_5GHZ || erp) {
@@ -130,13 +134,23 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
130 * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext 134 * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext
131 * 135 *
132 * T_SYM = 4 usec 136 * T_SYM = 4 usec
133 * 802.11a - 17.5.2: aSIFSTime = 16 usec 137 * 802.11a - 18.5.2: aSIFSTime = 16 usec
134 * 802.11g - 19.8.4: aSIFSTime = 10 usec + 138 * 802.11g - 19.8.4: aSIFSTime = 10 usec +
135 * signal ext = 6 usec 139 * signal ext = 6 usec
136 */ 140 */
137 dur = 16; /* SIFS + signal ext */ 141 dur = 16; /* SIFS + signal ext */
138 dur += 16; /* 17.3.2.3: T_PREAMBLE = 16 usec */ 142 dur += 16; /* IEEE 802.11-2012 18.3.2.4: T_PREAMBLE = 16 usec */
139 dur += 4; /* 17.3.2.3: T_SIGNAL = 4 usec */ 143 dur += 4; /* IEEE 802.11-2012 18.3.2.4: T_SIGNAL = 4 usec */
144
145 /* IEEE 802.11-2012 18.3.2.4: all values above are:
146 * * times 4 for 5 MHz
147 * * times 2 for 10 MHz
148 */
149 dur *= 1 << shift;
150
151 /* rates should already consider the channel bandwidth,
152 * don't apply divisor again.
153 */
140 dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10, 154 dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10,
141 4 * rate); /* T_SYM x N_SYM */ 155 4 * rate); /* T_SYM x N_SYM */
142 } else { 156 } else {
@@ -168,7 +182,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
168{ 182{
169 struct ieee80211_sub_if_data *sdata; 183 struct ieee80211_sub_if_data *sdata;
170 u16 dur; 184 u16 dur;
171 int erp; 185 int erp, shift = 0;
172 bool short_preamble = false; 186 bool short_preamble = false;
173 187
174 erp = 0; 188 erp = 0;
@@ -177,10 +191,11 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
177 short_preamble = sdata->vif.bss_conf.use_short_preamble; 191 short_preamble = sdata->vif.bss_conf.use_short_preamble;
178 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) 192 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
179 erp = rate->flags & IEEE80211_RATE_ERP_G; 193 erp = rate->flags & IEEE80211_RATE_ERP_G;
194 shift = ieee80211_vif_get_shift(vif);
180 } 195 }
181 196
182 dur = ieee80211_frame_duration(band, frame_len, rate->bitrate, erp, 197 dur = ieee80211_frame_duration(band, frame_len, rate->bitrate, erp,
183 short_preamble); 198 short_preamble, shift);
184 199
185 return cpu_to_le16(dur); 200 return cpu_to_le16(dur);
186} 201}
@@ -194,7 +209,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
194 struct ieee80211_rate *rate; 209 struct ieee80211_rate *rate;
195 struct ieee80211_sub_if_data *sdata; 210 struct ieee80211_sub_if_data *sdata;
196 bool short_preamble; 211 bool short_preamble;
197 int erp; 212 int erp, shift = 0, bitrate;
198 u16 dur; 213 u16 dur;
199 struct ieee80211_supported_band *sband; 214 struct ieee80211_supported_band *sband;
200 215
@@ -210,17 +225,20 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
210 short_preamble = sdata->vif.bss_conf.use_short_preamble; 225 short_preamble = sdata->vif.bss_conf.use_short_preamble;
211 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) 226 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
212 erp = rate->flags & IEEE80211_RATE_ERP_G; 227 erp = rate->flags & IEEE80211_RATE_ERP_G;
228 shift = ieee80211_vif_get_shift(vif);
213 } 229 }
214 230
231 bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift);
232
215 /* CTS duration */ 233 /* CTS duration */
216 dur = ieee80211_frame_duration(sband->band, 10, rate->bitrate, 234 dur = ieee80211_frame_duration(sband->band, 10, bitrate,
217 erp, short_preamble); 235 erp, short_preamble, shift);
218 /* Data frame duration */ 236 /* Data frame duration */
219 dur += ieee80211_frame_duration(sband->band, frame_len, rate->bitrate, 237 dur += ieee80211_frame_duration(sband->band, frame_len, bitrate,
220 erp, short_preamble); 238 erp, short_preamble, shift);
221 /* ACK duration */ 239 /* ACK duration */
222 dur += ieee80211_frame_duration(sband->band, 10, rate->bitrate, 240 dur += ieee80211_frame_duration(sband->band, 10, bitrate,
223 erp, short_preamble); 241 erp, short_preamble, shift);
224 242
225 return cpu_to_le16(dur); 243 return cpu_to_le16(dur);
226} 244}
@@ -235,7 +253,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
235 struct ieee80211_rate *rate; 253 struct ieee80211_rate *rate;
236 struct ieee80211_sub_if_data *sdata; 254 struct ieee80211_sub_if_data *sdata;
237 bool short_preamble; 255 bool short_preamble;
238 int erp; 256 int erp, shift = 0, bitrate;
239 u16 dur; 257 u16 dur;
240 struct ieee80211_supported_band *sband; 258 struct ieee80211_supported_band *sband;
241 259
@@ -250,15 +268,18 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
250 short_preamble = sdata->vif.bss_conf.use_short_preamble; 268 short_preamble = sdata->vif.bss_conf.use_short_preamble;
251 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) 269 if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
252 erp = rate->flags & IEEE80211_RATE_ERP_G; 270 erp = rate->flags & IEEE80211_RATE_ERP_G;
271 shift = ieee80211_vif_get_shift(vif);
253 } 272 }
254 273
274 bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift);
275
255 /* Data frame duration */ 276 /* Data frame duration */
256 dur = ieee80211_frame_duration(sband->band, frame_len, rate->bitrate, 277 dur = ieee80211_frame_duration(sband->band, frame_len, bitrate,
257 erp, short_preamble); 278 erp, short_preamble, shift);
258 if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) { 279 if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) {
259 /* ACK duration */ 280 /* ACK duration */
260 dur += ieee80211_frame_duration(sband->band, 10, rate->bitrate, 281 dur += ieee80211_frame_duration(sband->band, 10, bitrate,
261 erp, short_preamble); 282 erp, short_preamble, shift);
262 } 283 }
263 284
264 return cpu_to_le16(dur); 285 return cpu_to_le16(dur);
@@ -560,6 +581,9 @@ void ieee80211_iterate_active_interfaces(
560 list_for_each_entry(sdata, &local->interfaces, list) { 581 list_for_each_entry(sdata, &local->interfaces, list) {
561 switch (sdata->vif.type) { 582 switch (sdata->vif.type) {
562 case NL80211_IFTYPE_MONITOR: 583 case NL80211_IFTYPE_MONITOR:
584 if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
585 continue;
586 break;
563 case NL80211_IFTYPE_AP_VLAN: 587 case NL80211_IFTYPE_AP_VLAN:
564 continue; 588 continue;
565 default: 589 default:
@@ -598,6 +622,9 @@ void ieee80211_iterate_active_interfaces_atomic(
598 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 622 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
599 switch (sdata->vif.type) { 623 switch (sdata->vif.type) {
600 case NL80211_IFTYPE_MONITOR: 624 case NL80211_IFTYPE_MONITOR:
625 if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
626 continue;
627 break;
601 case NL80211_IFTYPE_AP_VLAN: 628 case NL80211_IFTYPE_AP_VLAN:
602 continue; 629 continue;
603 default: 630 default:
@@ -661,12 +688,12 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
661} 688}
662EXPORT_SYMBOL(ieee80211_queue_delayed_work); 689EXPORT_SYMBOL(ieee80211_queue_delayed_work);
663 690
664u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, bool action, 691u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
665 struct ieee802_11_elems *elems, 692 struct ieee802_11_elems *elems,
666 u64 filter, u32 crc) 693 u64 filter, u32 crc)
667{ 694{
668 size_t left = len; 695 size_t left = len;
669 u8 *pos = start; 696 const u8 *pos = start;
670 bool calc_crc = filter != 0; 697 bool calc_crc = filter != 0;
671 DECLARE_BITMAP(seen_elems, 256); 698 DECLARE_BITMAP(seen_elems, 256);
672 const u8 *ie; 699 const u8 *ie;
@@ -1046,58 +1073,6 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
1046 } 1073 }
1047} 1074}
1048 1075
1049void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
1050 const size_t supp_rates_len,
1051 const u8 *supp_rates)
1052{
1053 struct ieee80211_chanctx_conf *chanctx_conf;
1054 int i, have_higher_than_11mbit = 0;
1055
1056 /* cf. IEEE 802.11 9.2.12 */
1057 for (i = 0; i < supp_rates_len; i++)
1058 if ((supp_rates[i] & 0x7f) * 5 > 110)
1059 have_higher_than_11mbit = 1;
1060
1061 rcu_read_lock();
1062 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
1063
1064 if (chanctx_conf &&
1065 chanctx_conf->def.chan->band == IEEE80211_BAND_2GHZ &&
1066 have_higher_than_11mbit)
1067 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
1068 else
1069 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
1070 rcu_read_unlock();
1071
1072 ieee80211_set_wmm_default(sdata, true);
1073}
1074
1075u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
1076 enum ieee80211_band band)
1077{
1078 struct ieee80211_supported_band *sband;
1079 struct ieee80211_rate *bitrates;
1080 u32 mandatory_rates;
1081 enum ieee80211_rate_flags mandatory_flag;
1082 int i;
1083
1084 sband = local->hw.wiphy->bands[band];
1085 if (WARN_ON(!sband))
1086 return 1;
1087
1088 if (band == IEEE80211_BAND_2GHZ)
1089 mandatory_flag = IEEE80211_RATE_MANDATORY_B;
1090 else
1091 mandatory_flag = IEEE80211_RATE_MANDATORY_A;
1092
1093 bitrates = sband->bitrates;
1094 mandatory_rates = 0;
1095 for (i = 0; i < sband->n_bitrates; i++)
1096 if (bitrates[i].flags & mandatory_flag)
1097 mandatory_rates |= BIT(i);
1098 return mandatory_rates;
1099}
1100
1101void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, 1076void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
1102 u16 transaction, u16 auth_alg, u16 status, 1077 u16 transaction, u16 auth_alg, u16 status,
1103 const u8 *extra, size_t extra_len, const u8 *da, 1078 const u8 *extra, size_t extra_len, const u8 *da,
@@ -1182,7 +1157,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
1182int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, 1157int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1183 size_t buffer_len, const u8 *ie, size_t ie_len, 1158 size_t buffer_len, const u8 *ie, size_t ie_len,
1184 enum ieee80211_band band, u32 rate_mask, 1159 enum ieee80211_band band, u32 rate_mask,
1185 u8 channel) 1160 struct cfg80211_chan_def *chandef)
1186{ 1161{
1187 struct ieee80211_supported_band *sband; 1162 struct ieee80211_supported_band *sband;
1188 u8 *pos = buffer, *end = buffer + buffer_len; 1163 u8 *pos = buffer, *end = buffer + buffer_len;
@@ -1191,16 +1166,26 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1191 u8 rates[32]; 1166 u8 rates[32];
1192 int num_rates; 1167 int num_rates;
1193 int ext_rates_len; 1168 int ext_rates_len;
1169 int shift;
1170 u32 rate_flags;
1194 1171
1195 sband = local->hw.wiphy->bands[band]; 1172 sband = local->hw.wiphy->bands[band];
1196 if (WARN_ON_ONCE(!sband)) 1173 if (WARN_ON_ONCE(!sband))
1197 return 0; 1174 return 0;
1198 1175
1176 rate_flags = ieee80211_chandef_rate_flags(chandef);
1177 shift = ieee80211_chandef_get_shift(chandef);
1178
1199 num_rates = 0; 1179 num_rates = 0;
1200 for (i = 0; i < sband->n_bitrates; i++) { 1180 for (i = 0; i < sband->n_bitrates; i++) {
1201 if ((BIT(i) & rate_mask) == 0) 1181 if ((BIT(i) & rate_mask) == 0)
1202 continue; /* skip rate */ 1182 continue; /* skip rate */
1203 rates[num_rates++] = (u8) (sband->bitrates[i].bitrate / 5); 1183 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
1184 continue;
1185
1186 rates[num_rates++] =
1187 (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate,
1188 (1 << shift) * 5);
1204 } 1189 }
1205 1190
1206 supp_rates_len = min_t(int, num_rates, 8); 1191 supp_rates_len = min_t(int, num_rates, 8);
@@ -1240,12 +1225,13 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1240 pos += ext_rates_len; 1225 pos += ext_rates_len;
1241 } 1226 }
1242 1227
1243 if (channel && sband->band == IEEE80211_BAND_2GHZ) { 1228 if (chandef->chan && sband->band == IEEE80211_BAND_2GHZ) {
1244 if (end - pos < 3) 1229 if (end - pos < 3)
1245 goto out_err; 1230 goto out_err;
1246 *pos++ = WLAN_EID_DS_PARAMS; 1231 *pos++ = WLAN_EID_DS_PARAMS;
1247 *pos++ = 1; 1232 *pos++ = 1;
1248 *pos++ = channel; 1233 *pos++ = ieee80211_frequency_to_channel(
1234 chandef->chan->center_freq);
1249 } 1235 }
1250 1236
1251 /* insert custom IEs that go before HT */ 1237 /* insert custom IEs that go before HT */
@@ -1310,9 +1296,9 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
1310 bool directed) 1296 bool directed)
1311{ 1297{
1312 struct ieee80211_local *local = sdata->local; 1298 struct ieee80211_local *local = sdata->local;
1299 struct cfg80211_chan_def chandef;
1313 struct sk_buff *skb; 1300 struct sk_buff *skb;
1314 struct ieee80211_mgmt *mgmt; 1301 struct ieee80211_mgmt *mgmt;
1315 u8 chan_no;
1316 int ies_len; 1302 int ies_len;
1317 1303
1318 /* 1304 /*
@@ -1320,10 +1306,11 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
1320 * in order to maximize the chance that we get a response. Some 1306 * in order to maximize the chance that we get a response. Some
1321 * badly-behaved APs don't respond when this parameter is included. 1307 * badly-behaved APs don't respond when this parameter is included.
1322 */ 1308 */
1309 chandef.width = sdata->vif.bss_conf.chandef.width;
1323 if (directed) 1310 if (directed)
1324 chan_no = 0; 1311 chandef.chan = NULL;
1325 else 1312 else
1326 chan_no = ieee80211_frequency_to_channel(chan->center_freq); 1313 chandef.chan = chan;
1327 1314
1328 skb = ieee80211_probereq_get(&local->hw, &sdata->vif, 1315 skb = ieee80211_probereq_get(&local->hw, &sdata->vif,
1329 ssid, ssid_len, 100 + ie_len); 1316 ssid, ssid_len, 100 + ie_len);
@@ -1333,7 +1320,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
1333 ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb), 1320 ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb),
1334 skb_tailroom(skb), 1321 skb_tailroom(skb),
1335 ie, ie_len, chan->band, 1322 ie, ie_len, chan->band,
1336 ratemask, chan_no); 1323 ratemask, &chandef);
1337 skb_put(skb, ies_len); 1324 skb_put(skb, ies_len);
1338 1325
1339 if (dst) { 1326 if (dst) {
@@ -1367,16 +1354,19 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1367 } 1354 }
1368} 1355}
1369 1356
1370u32 ieee80211_sta_get_rates(struct ieee80211_local *local, 1357u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
1371 struct ieee802_11_elems *elems, 1358 struct ieee802_11_elems *elems,
1372 enum ieee80211_band band, u32 *basic_rates) 1359 enum ieee80211_band band, u32 *basic_rates)
1373{ 1360{
1374 struct ieee80211_supported_band *sband; 1361 struct ieee80211_supported_band *sband;
1375 struct ieee80211_rate *bitrates; 1362 struct ieee80211_rate *bitrates;
1376 size_t num_rates; 1363 size_t num_rates;
1377 u32 supp_rates; 1364 u32 supp_rates, rate_flags;
1378 int i, j; 1365 int i, j, shift;
1379 sband = local->hw.wiphy->bands[band]; 1366 sband = sdata->local->hw.wiphy->bands[band];
1367
1368 rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
1369 shift = ieee80211_vif_get_shift(&sdata->vif);
1380 1370
1381 if (WARN_ON(!sband)) 1371 if (WARN_ON(!sband))
1382 return 1; 1372 return 1;
@@ -1401,7 +1391,15 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
1401 continue; 1391 continue;
1402 1392
1403 for (j = 0; j < num_rates; j++) { 1393 for (j = 0; j < num_rates; j++) {
1404 if (bitrates[j].bitrate == own_rate) { 1394 int brate;
1395 if ((rate_flags & sband->bitrates[j].flags)
1396 != rate_flags)
1397 continue;
1398
1399 brate = DIV_ROUND_UP(sband->bitrates[j].bitrate,
1400 1 << shift);
1401
1402 if (brate == own_rate) {
1405 supp_rates |= BIT(j); 1403 supp_rates |= BIT(j);
1406 if (basic_rates && is_basic) 1404 if (basic_rates && is_basic)
1407 *basic_rates |= BIT(j); 1405 *basic_rates |= BIT(j);
@@ -1455,8 +1453,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1455 local->resuming = true; 1453 local->resuming = true;
1456 1454
1457 if (local->wowlan) { 1455 if (local->wowlan) {
1458 local->wowlan = false;
1459 res = drv_resume(local); 1456 res = drv_resume(local);
1457 local->wowlan = false;
1460 if (res < 0) { 1458 if (res < 0) {
1461 local->resuming = false; 1459 local->resuming = false;
1462 return res; 1460 return res;
@@ -1604,12 +1602,13 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1604 BSS_CHANGED_ARP_FILTER | 1602 BSS_CHANGED_ARP_FILTER |
1605 BSS_CHANGED_PS; 1603 BSS_CHANGED_PS;
1606 1604
1607 if (sdata->u.mgd.dtim_period) 1605 /* Re-send beacon info report to the driver */
1608 changed |= BSS_CHANGED_DTIM_PERIOD; 1606 if (sdata->u.mgd.have_beacon)
1607 changed |= BSS_CHANGED_BEACON_INFO;
1609 1608
1610 mutex_lock(&sdata->u.mgd.mtx); 1609 sdata_lock(sdata);
1611 ieee80211_bss_info_change_notify(sdata, changed); 1610 ieee80211_bss_info_change_notify(sdata, changed);
1612 mutex_unlock(&sdata->u.mgd.mtx); 1611 sdata_unlock(sdata);
1613 break; 1612 break;
1614 case NL80211_IFTYPE_ADHOC: 1613 case NL80211_IFTYPE_ADHOC:
1615 changed |= BSS_CHANGED_IBSS; 1614 changed |= BSS_CHANGED_IBSS;
@@ -2023,18 +2022,56 @@ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
2023 cfg80211_chandef_create(chandef, control_chan, channel_type); 2022 cfg80211_chandef_create(chandef, control_chan, channel_type);
2024} 2023}
2025 2024
2025int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
2026 const struct ieee80211_supported_band *sband,
2027 const u8 *srates, int srates_len, u32 *rates)
2028{
2029 u32 rate_flags = ieee80211_chandef_rate_flags(chandef);
2030 int shift = ieee80211_chandef_get_shift(chandef);
2031 struct ieee80211_rate *br;
2032 int brate, rate, i, j, count = 0;
2033
2034 *rates = 0;
2035
2036 for (i = 0; i < srates_len; i++) {
2037 rate = srates[i] & 0x7f;
2038
2039 for (j = 0; j < sband->n_bitrates; j++) {
2040 br = &sband->bitrates[j];
2041 if ((rate_flags & br->flags) != rate_flags)
2042 continue;
2043
2044 brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
2045 if (brate == rate) {
2046 *rates |= BIT(j);
2047 count++;
2048 break;
2049 }
2050 }
2051 }
2052 return count;
2053}
2054
2026int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, 2055int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
2027 struct sk_buff *skb, bool need_basic, 2056 struct sk_buff *skb, bool need_basic,
2028 enum ieee80211_band band) 2057 enum ieee80211_band band)
2029{ 2058{
2030 struct ieee80211_local *local = sdata->local; 2059 struct ieee80211_local *local = sdata->local;
2031 struct ieee80211_supported_band *sband; 2060 struct ieee80211_supported_band *sband;
2032 int rate; 2061 int rate, shift;
2033 u8 i, rates, *pos; 2062 u8 i, rates, *pos;
2034 u32 basic_rates = sdata->vif.bss_conf.basic_rates; 2063 u32 basic_rates = sdata->vif.bss_conf.basic_rates;
2064 u32 rate_flags;
2035 2065
2066 shift = ieee80211_vif_get_shift(&sdata->vif);
2067 rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
2036 sband = local->hw.wiphy->bands[band]; 2068 sband = local->hw.wiphy->bands[band];
2037 rates = sband->n_bitrates; 2069 rates = 0;
2070 for (i = 0; i < sband->n_bitrates; i++) {
2071 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
2072 continue;
2073 rates++;
2074 }
2038 if (rates > 8) 2075 if (rates > 8)
2039 rates = 8; 2076 rates = 8;
2040 2077
@@ -2046,10 +2083,15 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
2046 *pos++ = rates; 2083 *pos++ = rates;
2047 for (i = 0; i < rates; i++) { 2084 for (i = 0; i < rates; i++) {
2048 u8 basic = 0; 2085 u8 basic = 0;
2086 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
2087 continue;
2088
2049 if (need_basic && basic_rates & BIT(i)) 2089 if (need_basic && basic_rates & BIT(i))
2050 basic = 0x80; 2090 basic = 0x80;
2051 rate = sband->bitrates[i].bitrate; 2091 rate = sband->bitrates[i].bitrate;
2052 *pos++ = basic | (u8) (rate / 5); 2092 rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
2093 5 * (1 << shift));
2094 *pos++ = basic | (u8) rate;
2053 } 2095 }
2054 2096
2055 return 0; 2097 return 0;
@@ -2061,12 +2103,22 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
2061{ 2103{
2062 struct ieee80211_local *local = sdata->local; 2104 struct ieee80211_local *local = sdata->local;
2063 struct ieee80211_supported_band *sband; 2105 struct ieee80211_supported_band *sband;
2064 int rate; 2106 int rate, skip, shift;
2065 u8 i, exrates, *pos; 2107 u8 i, exrates, *pos;
2066 u32 basic_rates = sdata->vif.bss_conf.basic_rates; 2108 u32 basic_rates = sdata->vif.bss_conf.basic_rates;
2109 u32 rate_flags;
2110
2111 rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
2112 shift = ieee80211_vif_get_shift(&sdata->vif);
2067 2113
2068 sband = local->hw.wiphy->bands[band]; 2114 sband = local->hw.wiphy->bands[band];
2069 exrates = sband->n_bitrates; 2115 exrates = 0;
2116 for (i = 0; i < sband->n_bitrates; i++) {
2117 if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
2118 continue;
2119 exrates++;
2120 }
2121
2070 if (exrates > 8) 2122 if (exrates > 8)
2071 exrates -= 8; 2123 exrates -= 8;
2072 else 2124 else
@@ -2079,12 +2131,19 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
2079 pos = skb_put(skb, exrates + 2); 2131 pos = skb_put(skb, exrates + 2);
2080 *pos++ = WLAN_EID_EXT_SUPP_RATES; 2132 *pos++ = WLAN_EID_EXT_SUPP_RATES;
2081 *pos++ = exrates; 2133 *pos++ = exrates;
2134 skip = 0;
2082 for (i = 8; i < sband->n_bitrates; i++) { 2135 for (i = 8; i < sband->n_bitrates; i++) {
2083 u8 basic = 0; 2136 u8 basic = 0;
2137 if ((rate_flags & sband->bitrates[i].flags)
2138 != rate_flags)
2139 continue;
2140 if (skip++ < 8)
2141 continue;
2084 if (need_basic && basic_rates & BIT(i)) 2142 if (need_basic && basic_rates & BIT(i))
2085 basic = 0x80; 2143 basic = 0x80;
2086 rate = sband->bitrates[i].bitrate; 2144 rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
2087 *pos++ = basic | (u8) (rate / 5); 2145 5 * (1 << shift));
2146 *pos++ = basic | (u8) rate;
2088 } 2147 }
2089 } 2148 }
2090 return 0; 2149 return 0;
@@ -2168,9 +2227,17 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
2168 ri.flags |= RATE_INFO_FLAGS_SHORT_GI; 2227 ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
2169 } else { 2228 } else {
2170 struct ieee80211_supported_band *sband; 2229 struct ieee80211_supported_band *sband;
2230 int shift = 0;
2231 int bitrate;
2232
2233 if (status->flag & RX_FLAG_10MHZ)
2234 shift = 1;
2235 if (status->flag & RX_FLAG_5MHZ)
2236 shift = 2;
2171 2237
2172 sband = local->hw.wiphy->bands[status->band]; 2238 sband = local->hw.wiphy->bands[status->band];
2173 ri.legacy = sband->bitrates[status->rate_idx].bitrate; 2239 bitrate = sband->bitrates[status->rate_idx].bitrate;
2240 ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift));
2174 } 2241 }
2175 2242
2176 rate = cfg80211_calculate_bitrate(&ri); 2243 rate = cfg80211_calculate_bitrate(&ri);
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 171344d4eb7c..97c289414e32 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -396,7 +396,7 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
396 new_bw = ieee80211_sta_cur_vht_bw(sta); 396 new_bw = ieee80211_sta_cur_vht_bw(sta);
397 if (new_bw != sta->sta.bandwidth) { 397 if (new_bw != sta->sta.bandwidth) {
398 sta->sta.bandwidth = new_bw; 398 sta->sta.bandwidth = new_bw;
399 changed |= IEEE80211_RC_NSS_CHANGED; 399 changed |= IEEE80211_RC_BW_CHANGED;
400 } 400 }
401 401
402 change: 402 change:
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index c04d401dae92..6ee2b5863572 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -28,7 +28,7 @@
28int ieee80211_wep_init(struct ieee80211_local *local) 28int ieee80211_wep_init(struct ieee80211_local *local)
29{ 29{
30 /* start WEP IV from a random value */ 30 /* start WEP IV from a random value */
31 get_random_bytes(&local->wep_iv, WEP_IV_LEN); 31 get_random_bytes(&local->wep_iv, IEEE80211_WEP_IV_LEN);
32 32
33 local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); 33 local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC);
34 if (IS_ERR(local->wep_tx_tfm)) { 34 if (IS_ERR(local->wep_tx_tfm)) {
@@ -98,20 +98,21 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local,
98 98
99 hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); 99 hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
100 100
101 if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN || 101 if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN ||
102 skb_headroom(skb) < WEP_IV_LEN)) 102 skb_headroom(skb) < IEEE80211_WEP_IV_LEN))
103 return NULL; 103 return NULL;
104 104
105 hdrlen = ieee80211_hdrlen(hdr->frame_control); 105 hdrlen = ieee80211_hdrlen(hdr->frame_control);
106 newhdr = skb_push(skb, WEP_IV_LEN); 106 newhdr = skb_push(skb, IEEE80211_WEP_IV_LEN);
107 memmove(newhdr, newhdr + WEP_IV_LEN, hdrlen); 107 memmove(newhdr, newhdr + IEEE80211_WEP_IV_LEN, hdrlen);
108 108
109 /* the HW only needs room for the IV, but not the actual IV */ 109 /* the HW only needs room for the IV, but not the actual IV */
110 if (info->control.hw_key && 110 if (info->control.hw_key &&
111 (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) 111 (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))
112 return newhdr + hdrlen; 112 return newhdr + hdrlen;
113 113
114 skb_set_network_header(skb, skb_network_offset(skb) + WEP_IV_LEN); 114 skb_set_network_header(skb, skb_network_offset(skb) +
115 IEEE80211_WEP_IV_LEN);
115 ieee80211_wep_get_iv(local, keylen, keyidx, newhdr + hdrlen); 116 ieee80211_wep_get_iv(local, keylen, keyidx, newhdr + hdrlen);
116 return newhdr + hdrlen; 117 return newhdr + hdrlen;
117} 118}
@@ -125,8 +126,8 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local,
125 unsigned int hdrlen; 126 unsigned int hdrlen;
126 127
127 hdrlen = ieee80211_hdrlen(hdr->frame_control); 128 hdrlen = ieee80211_hdrlen(hdr->frame_control);
128 memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); 129 memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen);
129 skb_pull(skb, WEP_IV_LEN); 130 skb_pull(skb, IEEE80211_WEP_IV_LEN);
130} 131}
131 132
132 133
@@ -146,7 +147,7 @@ int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
146 put_unaligned(icv, (__le32 *)(data + data_len)); 147 put_unaligned(icv, (__le32 *)(data + data_len));
147 148
148 crypto_cipher_setkey(tfm, rc4key, klen); 149 crypto_cipher_setkey(tfm, rc4key, klen);
149 for (i = 0; i < data_len + WEP_ICV_LEN; i++) 150 for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++)
150 crypto_cipher_encrypt_one(tfm, data + i, data + i); 151 crypto_cipher_encrypt_one(tfm, data + i, data + i);
151 152
152 return 0; 153 return 0;
@@ -172,7 +173,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local,
172 if (!iv) 173 if (!iv)
173 return -1; 174 return -1;
174 175
175 len = skb->len - (iv + WEP_IV_LEN - skb->data); 176 len = skb->len - (iv + IEEE80211_WEP_IV_LEN - skb->data);
176 177
177 /* Prepend 24-bit IV to RC4 key */ 178 /* Prepend 24-bit IV to RC4 key */
178 memcpy(rc4key, iv, 3); 179 memcpy(rc4key, iv, 3);
@@ -181,10 +182,10 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local,
181 memcpy(rc4key + 3, key, keylen); 182 memcpy(rc4key + 3, key, keylen);
182 183
183 /* Add room for ICV */ 184 /* Add room for ICV */
184 skb_put(skb, WEP_ICV_LEN); 185 skb_put(skb, IEEE80211_WEP_ICV_LEN);
185 186
186 return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3, 187 return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3,
187 iv + WEP_IV_LEN, len); 188 iv + IEEE80211_WEP_IV_LEN, len);
188} 189}
189 190
190 191
@@ -201,11 +202,11 @@ int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
201 return -1; 202 return -1;
202 203
203 crypto_cipher_setkey(tfm, rc4key, klen); 204 crypto_cipher_setkey(tfm, rc4key, klen);
204 for (i = 0; i < data_len + WEP_ICV_LEN; i++) 205 for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++)
205 crypto_cipher_decrypt_one(tfm, data + i, data + i); 206 crypto_cipher_decrypt_one(tfm, data + i, data + i);
206 207
207 crc = cpu_to_le32(~crc32_le(~0, data, data_len)); 208 crc = cpu_to_le32(~crc32_le(~0, data, data_len));
208 if (memcmp(&crc, data + data_len, WEP_ICV_LEN) != 0) 209 if (memcmp(&crc, data + data_len, IEEE80211_WEP_ICV_LEN) != 0)
209 /* ICV mismatch */ 210 /* ICV mismatch */
210 return -1; 211 return -1;
211 212
@@ -237,10 +238,10 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
237 return -1; 238 return -1;
238 239
239 hdrlen = ieee80211_hdrlen(hdr->frame_control); 240 hdrlen = ieee80211_hdrlen(hdr->frame_control);
240 if (skb->len < hdrlen + WEP_IV_LEN + WEP_ICV_LEN) 241 if (skb->len < hdrlen + IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN)
241 return -1; 242 return -1;
242 243
243 len = skb->len - hdrlen - WEP_IV_LEN - WEP_ICV_LEN; 244 len = skb->len - hdrlen - IEEE80211_WEP_IV_LEN - IEEE80211_WEP_ICV_LEN;
244 245
245 keyidx = skb->data[hdrlen + 3] >> 6; 246 keyidx = skb->data[hdrlen + 3] >> 6;
246 247
@@ -256,16 +257,16 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
256 memcpy(rc4key + 3, key->conf.key, key->conf.keylen); 257 memcpy(rc4key + 3, key->conf.key, key->conf.keylen);
257 258
258 if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, 259 if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen,
259 skb->data + hdrlen + WEP_IV_LEN, 260 skb->data + hdrlen +
260 len)) 261 IEEE80211_WEP_IV_LEN, len))
261 ret = -1; 262 ret = -1;
262 263
263 /* Trim ICV */ 264 /* Trim ICV */
264 skb_trim(skb, skb->len - WEP_ICV_LEN); 265 skb_trim(skb, skb->len - IEEE80211_WEP_ICV_LEN);
265 266
266 /* Remove IV */ 267 /* Remove IV */
267 memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); 268 memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen);
268 skb_pull(skb, WEP_IV_LEN); 269 skb_pull(skb, IEEE80211_WEP_IV_LEN);
269 270
270 return ret; 271 return ret;
271} 272}
@@ -305,13 +306,14 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx)
305 if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) 306 if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key))
306 return RX_DROP_UNUSABLE; 307 return RX_DROP_UNUSABLE;
307 } else if (!(status->flag & RX_FLAG_IV_STRIPPED)) { 308 } else if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
308 if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + WEP_IV_LEN)) 309 if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) +
310 IEEE80211_WEP_IV_LEN))
309 return RX_DROP_UNUSABLE; 311 return RX_DROP_UNUSABLE;
310 if (rx->sta && ieee80211_wep_is_weak_iv(rx->skb, rx->key)) 312 if (rx->sta && ieee80211_wep_is_weak_iv(rx->skb, rx->key))
311 rx->sta->wep_weak_iv_count++; 313 rx->sta->wep_weak_iv_count++;
312 ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); 314 ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key);
313 /* remove ICV */ 315 /* remove ICV */
314 if (pskb_trim(rx->skb, rx->skb->len - WEP_ICV_LEN)) 316 if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN))
315 return RX_DROP_UNUSABLE; 317 return RX_DROP_UNUSABLE;
316 } 318 }
317 319
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index c7c6d644486f..c9edfcb7a13b 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -62,10 +62,10 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
62 62
63 tail = MICHAEL_MIC_LEN; 63 tail = MICHAEL_MIC_LEN;
64 if (!info->control.hw_key) 64 if (!info->control.hw_key)
65 tail += TKIP_ICV_LEN; 65 tail += IEEE80211_TKIP_ICV_LEN;
66 66
67 if (WARN_ON(skb_tailroom(skb) < tail || 67 if (WARN_ON(skb_tailroom(skb) < tail ||
68 skb_headroom(skb) < TKIP_IV_LEN)) 68 skb_headroom(skb) < IEEE80211_TKIP_IV_LEN))
69 return TX_DROP; 69 return TX_DROP;
70 70
71 key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; 71 key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY];
@@ -198,15 +198,16 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
198 if (info->control.hw_key) 198 if (info->control.hw_key)
199 tail = 0; 199 tail = 0;
200 else 200 else
201 tail = TKIP_ICV_LEN; 201 tail = IEEE80211_TKIP_ICV_LEN;
202 202
203 if (WARN_ON(skb_tailroom(skb) < tail || 203 if (WARN_ON(skb_tailroom(skb) < tail ||
204 skb_headroom(skb) < TKIP_IV_LEN)) 204 skb_headroom(skb) < IEEE80211_TKIP_IV_LEN))
205 return -1; 205 return -1;
206 206
207 pos = skb_push(skb, TKIP_IV_LEN); 207 pos = skb_push(skb, IEEE80211_TKIP_IV_LEN);
208 memmove(pos, pos + TKIP_IV_LEN, hdrlen); 208 memmove(pos, pos + IEEE80211_TKIP_IV_LEN, hdrlen);
209 skb_set_network_header(skb, skb_network_offset(skb) + TKIP_IV_LEN); 209 skb_set_network_header(skb, skb_network_offset(skb) +
210 IEEE80211_TKIP_IV_LEN);
210 pos += hdrlen; 211 pos += hdrlen;
211 212
212 /* the HW only needs room for the IV, but not the actual IV */ 213 /* the HW only needs room for the IV, but not the actual IV */
@@ -227,7 +228,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
227 return 0; 228 return 0;
228 229
229 /* Add room for ICV */ 230 /* Add room for ICV */
230 skb_put(skb, TKIP_ICV_LEN); 231 skb_put(skb, IEEE80211_TKIP_ICV_LEN);
231 232
232 return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, 233 return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm,
233 key, skb, pos, len); 234 key, skb, pos, len);
@@ -290,11 +291,11 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
290 return RX_DROP_UNUSABLE; 291 return RX_DROP_UNUSABLE;
291 292
292 /* Trim ICV */ 293 /* Trim ICV */
293 skb_trim(skb, skb->len - TKIP_ICV_LEN); 294 skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN);
294 295
295 /* Remove IV */ 296 /* Remove IV */
296 memmove(skb->data + TKIP_IV_LEN, skb->data, hdrlen); 297 memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen);
297 skb_pull(skb, TKIP_IV_LEN); 298 skb_pull(skb, IEEE80211_TKIP_IV_LEN);
298 299
299 return RX_CONTINUE; 300 return RX_CONTINUE;
300} 301}
@@ -337,9 +338,9 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
337 else 338 else
338 qos_tid = 0; 339 qos_tid = 0;
339 340
340 data_len = skb->len - hdrlen - CCMP_HDR_LEN; 341 data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN;
341 if (encrypted) 342 if (encrypted)
342 data_len -= CCMP_MIC_LEN; 343 data_len -= IEEE80211_CCMP_MIC_LEN;
343 344
344 /* First block, b_0 */ 345 /* First block, b_0 */
345 b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */ 346 b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */
@@ -348,7 +349,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
348 */ 349 */
349 b_0[1] = qos_tid | (mgmt << 4); 350 b_0[1] = qos_tid | (mgmt << 4);
350 memcpy(&b_0[2], hdr->addr2, ETH_ALEN); 351 memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
351 memcpy(&b_0[8], pn, CCMP_PN_LEN); 352 memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN);
352 /* l(m) */ 353 /* l(m) */
353 put_unaligned_be16(data_len, &b_0[14]); 354 put_unaligned_be16(data_len, &b_0[14]);
354 355
@@ -424,15 +425,16 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
424 if (info->control.hw_key) 425 if (info->control.hw_key)
425 tail = 0; 426 tail = 0;
426 else 427 else
427 tail = CCMP_MIC_LEN; 428 tail = IEEE80211_CCMP_MIC_LEN;
428 429
429 if (WARN_ON(skb_tailroom(skb) < tail || 430 if (WARN_ON(skb_tailroom(skb) < tail ||
430 skb_headroom(skb) < CCMP_HDR_LEN)) 431 skb_headroom(skb) < IEEE80211_CCMP_HDR_LEN))
431 return -1; 432 return -1;
432 433
433 pos = skb_push(skb, CCMP_HDR_LEN); 434 pos = skb_push(skb, IEEE80211_CCMP_HDR_LEN);
434 memmove(pos, pos + CCMP_HDR_LEN, hdrlen); 435 memmove(pos, pos + IEEE80211_CCMP_HDR_LEN, hdrlen);
435 skb_set_network_header(skb, skb_network_offset(skb) + CCMP_HDR_LEN); 436 skb_set_network_header(skb, skb_network_offset(skb) +
437 IEEE80211_CCMP_HDR_LEN);
436 438
437 /* the HW only needs room for the IV, but not the actual IV */ 439 /* the HW only needs room for the IV, but not the actual IV */
438 if (info->control.hw_key && 440 if (info->control.hw_key &&
@@ -457,10 +459,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
457 if (info->control.hw_key) 459 if (info->control.hw_key)
458 return 0; 460 return 0;
459 461
460 pos += CCMP_HDR_LEN; 462 pos += IEEE80211_CCMP_HDR_LEN;
461 ccmp_special_blocks(skb, pn, scratch, 0); 463 ccmp_special_blocks(skb, pn, scratch, 0);
462 ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, pos, len, 464 ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, pos, len,
463 pos, skb_put(skb, CCMP_MIC_LEN)); 465 pos, skb_put(skb, IEEE80211_CCMP_MIC_LEN));
464 466
465 return 0; 467 return 0;
466} 468}
@@ -490,7 +492,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
490 struct ieee80211_key *key = rx->key; 492 struct ieee80211_key *key = rx->key;
491 struct sk_buff *skb = rx->skb; 493 struct sk_buff *skb = rx->skb;
492 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 494 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
493 u8 pn[CCMP_PN_LEN]; 495 u8 pn[IEEE80211_CCMP_PN_LEN];
494 int data_len; 496 int data_len;
495 int queue; 497 int queue;
496 498
@@ -500,12 +502,13 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
500 !ieee80211_is_robust_mgmt_frame(hdr)) 502 !ieee80211_is_robust_mgmt_frame(hdr))
501 return RX_CONTINUE; 503 return RX_CONTINUE;
502 504
503 data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; 505 data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN -
506 IEEE80211_CCMP_MIC_LEN;
504 if (!rx->sta || data_len < 0) 507 if (!rx->sta || data_len < 0)
505 return RX_DROP_UNUSABLE; 508 return RX_DROP_UNUSABLE;
506 509
507 if (status->flag & RX_FLAG_DECRYPTED) { 510 if (status->flag & RX_FLAG_DECRYPTED) {
508 if (!pskb_may_pull(rx->skb, hdrlen + CCMP_HDR_LEN)) 511 if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN))
509 return RX_DROP_UNUSABLE; 512 return RX_DROP_UNUSABLE;
510 } else { 513 } else {
511 if (skb_linearize(rx->skb)) 514 if (skb_linearize(rx->skb))
@@ -516,7 +519,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
516 519
517 queue = rx->security_idx; 520 queue = rx->security_idx;
518 521
519 if (memcmp(pn, key->u.ccmp.rx_pn[queue], CCMP_PN_LEN) <= 0) { 522 if (memcmp(pn, key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN) <= 0) {
520 key->u.ccmp.replays++; 523 key->u.ccmp.replays++;
521 return RX_DROP_UNUSABLE; 524 return RX_DROP_UNUSABLE;
522 } 525 }
@@ -528,19 +531,20 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
528 531
529 if (ieee80211_aes_ccm_decrypt( 532 if (ieee80211_aes_ccm_decrypt(
530 key->u.ccmp.tfm, scratch, 533 key->u.ccmp.tfm, scratch,
531 skb->data + hdrlen + CCMP_HDR_LEN, data_len, 534 skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
532 skb->data + skb->len - CCMP_MIC_LEN, 535 data_len,
533 skb->data + hdrlen + CCMP_HDR_LEN)) 536 skb->data + skb->len - IEEE80211_CCMP_MIC_LEN,
537 skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN))
534 return RX_DROP_UNUSABLE; 538 return RX_DROP_UNUSABLE;
535 } 539 }
536 540
537 memcpy(key->u.ccmp.rx_pn[queue], pn, CCMP_PN_LEN); 541 memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN);
538 542
539 /* Remove CCMP header and MIC */ 543 /* Remove CCMP header and MIC */
540 if (pskb_trim(skb, skb->len - CCMP_MIC_LEN)) 544 if (pskb_trim(skb, skb->len - IEEE80211_CCMP_MIC_LEN))
541 return RX_DROP_UNUSABLE; 545 return RX_DROP_UNUSABLE;
542 memmove(skb->data + CCMP_HDR_LEN, skb->data, hdrlen); 546 memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen);
543 skb_pull(skb, CCMP_HDR_LEN); 547 skb_pull(skb, IEEE80211_CCMP_HDR_LEN);
544 548
545 return RX_CONTINUE; 549 return RX_CONTINUE;
546} 550}
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
new file mode 100644
index 000000000000..37421db88965
--- /dev/null
+++ b/net/mpls/Kconfig
@@ -0,0 +1,9 @@
1#
2# MPLS configuration
3#
4config NET_MPLS_GSO
5 tristate "MPLS: GSO support"
6 help
7 This is helper module to allow segmentation of non-MPLS GSO packets
8 that have had MPLS stack entries pushed onto them and thus
9 become MPLS GSO packets.
diff --git a/net/mpls/Makefile b/net/mpls/Makefile
new file mode 100644
index 000000000000..0a3c171be537
--- /dev/null
+++ b/net/mpls/Makefile
@@ -0,0 +1,4 @@
1#
2# Makefile for MPLS.
3#
4obj-y += mpls_gso.o
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
new file mode 100644
index 000000000000..1bec1219ab81
--- /dev/null
+++ b/net/mpls/mpls_gso.c
@@ -0,0 +1,108 @@
1/*
2 * MPLS GSO Support
3 *
4 * Authors: Simon Horman (horms@verge.net.au)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Based on: GSO portions of net/ipv4/gre.c
12 */
13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16#include <linux/err.h>
17#include <linux/module.h>
18#include <linux/netdev_features.h>
19#include <linux/netdevice.h>
20#include <linux/skbuff.h>
21
22static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
23 netdev_features_t features)
24{
25 struct sk_buff *segs = ERR_PTR(-EINVAL);
26 netdev_features_t mpls_features;
27 __be16 mpls_protocol;
28
29 if (unlikely(skb_shinfo(skb)->gso_type &
30 ~(SKB_GSO_TCPV4 |
31 SKB_GSO_TCPV6 |
32 SKB_GSO_UDP |
33 SKB_GSO_DODGY |
34 SKB_GSO_TCP_ECN |
35 SKB_GSO_GRE |
36 SKB_GSO_MPLS)))
37 goto out;
38
39 /* Setup inner SKB. */
40 mpls_protocol = skb->protocol;
41 skb->protocol = skb->inner_protocol;
42
43 /* Push back the mac header that skb_mac_gso_segment() has pulled.
44 * It will be re-pulled by the call to skb_mac_gso_segment() below
45 */
46 __skb_push(skb, skb->mac_len);
47
48 /* Segment inner packet. */
49 mpls_features = skb->dev->mpls_features & netif_skb_features(skb);
50 segs = skb_mac_gso_segment(skb, mpls_features);
51
52
53 /* Restore outer protocol. */
54 skb->protocol = mpls_protocol;
55
56 /* Re-pull the mac header that the call to skb_mac_gso_segment()
57 * above pulled. It will be re-pushed after returning
58 * skb_mac_gso_segment(), an indirect caller of this function.
59 */
60 __skb_push(skb, skb->data - skb_mac_header(skb));
61
62out:
63 return segs;
64}
65
66static int mpls_gso_send_check(struct sk_buff *skb)
67{
68 return 0;
69}
70
71static struct packet_offload mpls_mc_offload = {
72 .type = cpu_to_be16(ETH_P_MPLS_MC),
73 .callbacks = {
74 .gso_send_check = mpls_gso_send_check,
75 .gso_segment = mpls_gso_segment,
76 },
77};
78
79static struct packet_offload mpls_uc_offload = {
80 .type = cpu_to_be16(ETH_P_MPLS_UC),
81 .callbacks = {
82 .gso_send_check = mpls_gso_send_check,
83 .gso_segment = mpls_gso_segment,
84 },
85};
86
87static int __init mpls_gso_init(void)
88{
89 pr_info("MPLS GSO support\n");
90
91 dev_add_offload(&mpls_uc_offload);
92 dev_add_offload(&mpls_mc_offload);
93
94 return 0;
95}
96
97static void __exit mpls_gso_exit(void)
98{
99 dev_remove_offload(&mpls_uc_offload);
100 dev_remove_offload(&mpls_mc_offload);
101}
102
103module_init(mpls_gso_init);
104module_exit(mpls_gso_exit);
105
106MODULE_DESCRIPTION("MPLS GSO support");
107MODULE_AUTHOR("Simon Horman (horms@verge.net.au)");
108MODULE_LICENSE("GPL");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 56d22cae5906..6e839b6dff2b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -408,21 +408,10 @@ config NF_NAT_TFTP
408 depends on NF_CONNTRACK && NF_NAT 408 depends on NF_CONNTRACK && NF_NAT
409 default NF_NAT && NF_CONNTRACK_TFTP 409 default NF_NAT && NF_CONNTRACK_TFTP
410 410
411endif # NF_CONNTRACK 411config NETFILTER_SYNPROXY
412 412 tristate
413# transparent proxy support
414config NETFILTER_TPROXY
415 tristate "Transparent proxying support"
416 depends on IP_NF_MANGLE
417 depends on NETFILTER_ADVANCED
418 help
419 This option enables transparent proxying support, that is,
420 support for handling non-locally bound IPv4 TCP and UDP sockets.
421 For it to work you will have to configure certain iptables rules
422 and use policy routing. For more information on how to set it up
423 see Documentation/networking/tproxy.txt.
424 413
425 To compile it as a module, choose M here. If unsure, say N. 414endif # NF_CONNTRACK
426 415
427config NETFILTER_XTABLES 416config NETFILTER_XTABLES
428 tristate "Netfilter Xtables support (required for ip_tables)" 417 tristate "Netfilter Xtables support (required for ip_tables)"
@@ -720,10 +709,10 @@ config NETFILTER_XT_TARGET_TEE
720 this clone be rerouted to another nexthop. 709 this clone be rerouted to another nexthop.
721 710
722config NETFILTER_XT_TARGET_TPROXY 711config NETFILTER_XT_TARGET_TPROXY
723 tristate '"TPROXY" target support' 712 tristate '"TPROXY" target transparent proxying support'
724 depends on NETFILTER_TPROXY
725 depends on NETFILTER_XTABLES 713 depends on NETFILTER_XTABLES
726 depends on NETFILTER_ADVANCED 714 depends on NETFILTER_ADVANCED
715 depends on IP_NF_MANGLE
727 select NF_DEFRAG_IPV4 716 select NF_DEFRAG_IPV4
728 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 717 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
729 help 718 help
@@ -731,6 +720,9 @@ config NETFILTER_XT_TARGET_TPROXY
731 REDIRECT. It can only be used in the mangle table and is useful 720 REDIRECT. It can only be used in the mangle table and is useful
732 to redirect traffic to a transparent proxy. It does _not_ depend 721 to redirect traffic to a transparent proxy. It does _not_ depend
733 on Netfilter connection tracking and NAT, unlike REDIRECT. 722 on Netfilter connection tracking and NAT, unlike REDIRECT.
723 For it to work you will have to configure certain iptables rules
724 and use policy routing. For more information on how to set it up
725 see Documentation/networking/tproxy.txt.
734 726
735 To compile it as a module, choose M here. If unsure, say N. 727 To compile it as a module, choose M here. If unsure, say N.
736 728
@@ -1180,10 +1172,10 @@ config NETFILTER_XT_MATCH_SCTP
1180 1172
1181config NETFILTER_XT_MATCH_SOCKET 1173config NETFILTER_XT_MATCH_SOCKET
1182 tristate '"socket" match support' 1174 tristate '"socket" match support'
1183 depends on NETFILTER_TPROXY
1184 depends on NETFILTER_XTABLES 1175 depends on NETFILTER_XTABLES
1185 depends on NETFILTER_ADVANCED 1176 depends on NETFILTER_ADVANCED
1186 depends on !NF_CONNTRACK || NF_CONNTRACK 1177 depends on !NF_CONNTRACK || NF_CONNTRACK
1178 depends on (IPV6 || IPV6=n)
1187 select NF_DEFRAG_IPV4 1179 select NF_DEFRAG_IPV4
1188 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 1180 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
1189 help 1181 help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a1abf87d43bf..c3a0a12907f6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,6 @@
1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o 1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
2 2
3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o 3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
4nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o 4nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
5nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o 5nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
6nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o 6nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
@@ -61,8 +61,8 @@ obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
61obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o 61obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
62obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o 62obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
63 63
64# transparent proxy support 64# SYNPROXY
65obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o 65obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
66 66
67# generic X tables 67# generic X tables
68obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o 68obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 857ca9f35177..593b16ea45e0 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -234,12 +234,13 @@ EXPORT_SYMBOL(skb_make_writable);
234/* This does not belong here, but locally generated errors need it if connection 234/* This does not belong here, but locally generated errors need it if connection
235 tracking in use: without this, connection may not be in hash table, and hence 235 tracking in use: without this, connection may not be in hash table, and hence
236 manufactured ICMP or RST packets will not be associated with it. */ 236 manufactured ICMP or RST packets will not be associated with it. */
237void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly; 237void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
238 __rcu __read_mostly;
238EXPORT_SYMBOL(ip_ct_attach); 239EXPORT_SYMBOL(ip_ct_attach);
239 240
240void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) 241void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
241{ 242{
242 void (*attach)(struct sk_buff *, struct sk_buff *); 243 void (*attach)(struct sk_buff *, const struct sk_buff *);
243 244
244 if (skb->nfct) { 245 if (skb->nfct) {
245 rcu_read_lock(); 246 rcu_read_lock();
@@ -304,17 +305,26 @@ static struct pernet_operations netfilter_net_ops = {
304 .exit = netfilter_net_exit, 305 .exit = netfilter_net_exit,
305}; 306};
306 307
307void __init netfilter_init(void) 308int __init netfilter_init(void)
308{ 309{
309 int i, h; 310 int i, h, ret;
311
310 for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) { 312 for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
311 for (h = 0; h < NF_MAX_HOOKS; h++) 313 for (h = 0; h < NF_MAX_HOOKS; h++)
312 INIT_LIST_HEAD(&nf_hooks[i][h]); 314 INIT_LIST_HEAD(&nf_hooks[i][h]);
313 } 315 }
314 316
315 if (register_pernet_subsys(&netfilter_net_ops) < 0) 317 ret = register_pernet_subsys(&netfilter_net_ops);
316 panic("cannot create netfilter proc entry"); 318 if (ret < 0)
319 goto err;
320
321 ret = netfilter_log_init();
322 if (ret < 0)
323 goto err_pernet;
317 324
318 if (netfilter_log_init() < 0) 325 return 0;
319 panic("cannot initialize nf_log"); 326err_pernet:
327 unregister_pernet_subsys(&netfilter_net_ops);
328err:
329 return ret;
320} 330}
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index f77139007983..f2e30fb31e78 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1052,7 +1052,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
1052 * Not an artificial restriction anymore, as we must prevent 1052 * Not an artificial restriction anymore, as we must prevent
1053 * possible loops created by swapping in setlist type of sets. */ 1053 * possible loops created by swapping in setlist type of sets. */
1054 if (!(from->type->features == to->type->features && 1054 if (!(from->type->features == to->type->features &&
1055 from->type->family == to->type->family)) 1055 from->family == to->family))
1056 return -IPSET_ERR_TYPE_MISMATCH; 1056 return -IPSET_ERR_TYPE_MISMATCH;
1057 1057
1058 strncpy(from_name, from->name, IPSET_MAXNAMELEN); 1058 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
@@ -1489,8 +1489,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1489 if (ret == -EAGAIN) 1489 if (ret == -EAGAIN)
1490 ret = 1; 1490 ret = 1;
1491 1491
1492 return (ret < 0 && ret != -ENOTEMPTY) ? ret : 1492 return ret > 0 ? 0 : -IPSET_ERR_EXIST;
1493 ret > 0 ? 0 : -IPSET_ERR_EXIST;
1494} 1493}
1495 1494
1496/* Get headed data of a set */ 1495/* Get headed data of a set */
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 6fdf88ae2353..dac156f819ac 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -116,12 +116,12 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
116{ 116{
117 int protoff; 117 int protoff;
118 u8 nexthdr; 118 u8 nexthdr;
119 __be16 frag_off; 119 __be16 frag_off = 0;
120 120
121 nexthdr = ipv6_hdr(skb)->nexthdr; 121 nexthdr = ipv6_hdr(skb)->nexthdr;
122 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 122 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
123 &frag_off); 123 &frag_off);
124 if (protoff < 0) 124 if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
125 return false; 125 return false;
126 126
127 return get_port(skb, nexthdr, protoff, src, port, proto); 127 return get_port(skb, nexthdr, protoff, src, port, proto);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 57beb1762b2d..707bc520d629 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -325,18 +325,22 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
325static void 325static void
326mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) 326mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
327{ 327{
328 u8 i, j; 328 u8 i, j, net_end = nets_length - 1;
329 329
330 for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++) 330 for (i = 0; i < nets_length; i++) {
331 ; 331 if (h->nets[i].cidr != cidr)
332 h->nets[i].nets--; 332 continue;
333 333 if (h->nets[i].nets > 1 || i == net_end ||
334 if (h->nets[i].nets != 0) 334 h->nets[i + 1].nets == 0) {
335 return; 335 h->nets[i].nets--;
336 336 return;
337 for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) { 337 }
338 h->nets[j].cidr = h->nets[j + 1].cidr; 338 for (j = i; j < net_end && h->nets[j].nets; j++) {
339 h->nets[j].nets = h->nets[j + 1].nets; 339 h->nets[j].cidr = h->nets[j + 1].cidr;
340 h->nets[j].nets = h->nets[j + 1].nets;
341 }
342 h->nets[j].nets = 0;
343 return;
340 } 344 }
341} 345}
342#endif 346#endif
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index c6a525373be4..f15f3e28b9c3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -260,7 +260,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
260 e.ip = htonl(ip); 260 e.ip = htonl(ip);
261 e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1)); 261 e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1));
262 ret = adtfn(set, &e, &ext, &ext, flags); 262 ret = adtfn(set, &e, &ext, &ext, flags);
263 return ip_set_enomatch(ret, flags, adt) ? 1 : 263 return ip_set_enomatch(ret, flags, adt, set) ? -ret :
264 ip_set_eexist(ret, flags) ? 0 : ret; 264 ip_set_eexist(ret, flags) ? 0 : ret;
265 } 265 }
266 266
@@ -544,7 +544,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
544 544
545 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { 545 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
546 ret = adtfn(set, &e, &ext, &ext, flags); 546 ret = adtfn(set, &e, &ext, &ext, flags);
547 return ip_set_enomatch(ret, flags, adt) ? 1 : 547 return ip_set_enomatch(ret, flags, adt, set) ? -ret :
548 ip_set_eexist(ret, flags) ? 0 : ret; 548 ip_set_eexist(ret, flags) ? 0 : ret;
549 } 549 }
550 550
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index da740ceb56ae..223e9f546d0f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -199,7 +199,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
199 if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { 199 if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
200 e.ip = htonl(ip & ip_set_hostmask(e.cidr)); 200 e.ip = htonl(ip & ip_set_hostmask(e.cidr));
201 ret = adtfn(set, &e, &ext, &ext, flags); 201 ret = adtfn(set, &e, &ext, &ext, flags);
202 return ip_set_enomatch(ret, flags, adt) ? 1 : 202 return ip_set_enomatch(ret, flags, adt, set) ? -ret:
203 ip_set_eexist(ret, flags) ? 0 : ret; 203 ip_set_eexist(ret, flags) ? 0 : ret;
204 } 204 }
205 205
@@ -396,7 +396,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
396 396
397 ret = adtfn(set, &e, &ext, &ext, flags); 397 ret = adtfn(set, &e, &ext, &ext, flags);
398 398
399 return ip_set_enomatch(ret, flags, adt) ? 1 : 399 return ip_set_enomatch(ret, flags, adt, set) ? -ret :
400 ip_set_eexist(ret, flags) ? 0 : ret; 400 ip_set_eexist(ret, flags) ? 0 : ret;
401} 401}
402 402
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 84ae6f6ce624..7d798d5d5cd3 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -368,7 +368,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
368 if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { 368 if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
369 e.ip = htonl(ip & ip_set_hostmask(e.cidr)); 369 e.ip = htonl(ip & ip_set_hostmask(e.cidr));
370 ret = adtfn(set, &e, &ext, &ext, flags); 370 ret = adtfn(set, &e, &ext, &ext, flags);
371 return ip_set_enomatch(ret, flags, adt) ? 1 : 371 return ip_set_enomatch(ret, flags, adt, set) ? -ret :
372 ip_set_eexist(ret, flags) ? 0 : ret; 372 ip_set_eexist(ret, flags) ? 0 : ret;
373 } 373 }
374 374
@@ -634,7 +634,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
634 634
635 ret = adtfn(set, &e, &ext, &ext, flags); 635 ret = adtfn(set, &e, &ext, &ext, flags);
636 636
637 return ip_set_enomatch(ret, flags, adt) ? 1 : 637 return ip_set_enomatch(ret, flags, adt, set) ? -ret :
638 ip_set_eexist(ret, flags) ? 0 : ret; 638 ip_set_eexist(ret, flags) ? 0 : ret;
639} 639}
640 640
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 9a0869853be5..09d6690bee6f 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -244,7 +244,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
244 if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) { 244 if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) {
245 e.ip = htonl(ip & ip_set_hostmask(e.cidr + 1)); 245 e.ip = htonl(ip & ip_set_hostmask(e.cidr + 1));
246 ret = adtfn(set, &e, &ext, &ext, flags); 246 ret = adtfn(set, &e, &ext, &ext, flags);
247 return ip_set_enomatch(ret, flags, adt) ? 1 : 247 return ip_set_enomatch(ret, flags, adt, set) ? -ret :
248 ip_set_eexist(ret, flags) ? 0 : ret; 248 ip_set_eexist(ret, flags) ? 0 : ret;
249 } 249 }
250 250
@@ -489,7 +489,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
489 489
490 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { 490 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
491 ret = adtfn(set, &e, &ext, &ext, flags); 491 ret = adtfn(set, &e, &ext, &ext, flags);
492 return ip_set_enomatch(ret, flags, adt) ? 1 : 492 return ip_set_enomatch(ret, flags, adt, set) ? -ret :
493 ip_set_eexist(ret, flags) ? 0 : ret; 493 ip_set_eexist(ret, flags) ? 0 : ret;
494 } 494 }
495 495
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a083bda322b6..4c8e5c0aa1ab 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -975,8 +975,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
975 return cp; 975 return cp;
976 } 976 }
977 } 977 }
978 rcu_read_unlock(); 978 cond_resched_rcu();
979 rcu_read_lock();
980 } 979 }
981 980
982 return NULL; 981 return NULL;
@@ -1015,8 +1014,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1015 iter->l = &ip_vs_conn_tab[idx]; 1014 iter->l = &ip_vs_conn_tab[idx];
1016 return cp; 1015 return cp;
1017 } 1016 }
1018 rcu_read_unlock(); 1017 cond_resched_rcu();
1019 rcu_read_lock();
1020 } 1018 }
1021 iter->l = NULL; 1019 iter->l = NULL;
1022 return NULL; 1020 return NULL;
@@ -1206,17 +1204,13 @@ void ip_vs_random_dropentry(struct net *net)
1206 int idx; 1204 int idx;
1207 struct ip_vs_conn *cp, *cp_c; 1205 struct ip_vs_conn *cp, *cp_c;
1208 1206
1207 rcu_read_lock();
1209 /* 1208 /*
1210 * Randomly scan 1/32 of the whole table every second 1209 * Randomly scan 1/32 of the whole table every second
1211 */ 1210 */
1212 for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { 1211 for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
1213 unsigned int hash = net_random() & ip_vs_conn_tab_mask; 1212 unsigned int hash = net_random() & ip_vs_conn_tab_mask;
1214 1213
1215 /*
1216 * Lock is actually needed in this loop.
1217 */
1218 rcu_read_lock();
1219
1220 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { 1214 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
1221 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 1215 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
1222 /* connection template */ 1216 /* connection template */
@@ -1237,6 +1231,18 @@ void ip_vs_random_dropentry(struct net *net)
1237 default: 1231 default:
1238 continue; 1232 continue;
1239 } 1233 }
1234 } else if (cp->protocol == IPPROTO_SCTP) {
1235 switch (cp->state) {
1236 case IP_VS_SCTP_S_INIT1:
1237 case IP_VS_SCTP_S_INIT:
1238 break;
1239 case IP_VS_SCTP_S_ESTABLISHED:
1240 if (todrop_entry(cp))
1241 break;
1242 continue;
1243 default:
1244 continue;
1245 }
1240 } else { 1246 } else {
1241 if (!todrop_entry(cp)) 1247 if (!todrop_entry(cp))
1242 continue; 1248 continue;
@@ -1252,8 +1258,9 @@ void ip_vs_random_dropentry(struct net *net)
1252 __ip_vs_conn_put(cp); 1258 __ip_vs_conn_put(cp);
1253 } 1259 }
1254 } 1260 }
1255 rcu_read_unlock(); 1261 cond_resched_rcu();
1256 } 1262 }
1263 rcu_read_unlock();
1257} 1264}
1258 1265
1259 1266
@@ -1267,11 +1274,8 @@ static void ip_vs_conn_flush(struct net *net)
1267 struct netns_ipvs *ipvs = net_ipvs(net); 1274 struct netns_ipvs *ipvs = net_ipvs(net);
1268 1275
1269flush_again: 1276flush_again:
1277 rcu_read_lock();
1270 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 1278 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
1271 /*
1272 * Lock is actually needed in this loop.
1273 */
1274 rcu_read_lock();
1275 1279
1276 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { 1280 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
1277 if (!ip_vs_conn_net_eq(cp, net)) 1281 if (!ip_vs_conn_net_eq(cp, net))
@@ -1286,8 +1290,9 @@ flush_again:
1286 __ip_vs_conn_put(cp); 1290 __ip_vs_conn_put(cp);
1287 } 1291 }
1288 } 1292 }
1289 rcu_read_unlock(); 1293 cond_resched_rcu();
1290 } 1294 }
1295 rcu_read_unlock();
1291 1296
1292 /* the counter may be not NULL, because maybe some conn entries 1297 /* the counter may be not NULL, because maybe some conn entries
1293 are run by slow timer handler or unhashed but still referred */ 1298 are run by slow timer handler or unhashed but still referred */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 05565d2b3a61..74fd00c27210 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
116 116
117 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 117 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
118 struct ip_vs_cpu_stats *s; 118 struct ip_vs_cpu_stats *s;
119 struct ip_vs_service *svc;
119 120
120 s = this_cpu_ptr(dest->stats.cpustats); 121 s = this_cpu_ptr(dest->stats.cpustats);
121 s->ustats.inpkts++; 122 s->ustats.inpkts++;
@@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
123 s->ustats.inbytes += skb->len; 124 s->ustats.inbytes += skb->len;
124 u64_stats_update_end(&s->syncp); 125 u64_stats_update_end(&s->syncp);
125 126
126 s = this_cpu_ptr(dest->svc->stats.cpustats); 127 rcu_read_lock();
128 svc = rcu_dereference(dest->svc);
129 s = this_cpu_ptr(svc->stats.cpustats);
127 s->ustats.inpkts++; 130 s->ustats.inpkts++;
128 u64_stats_update_begin(&s->syncp); 131 u64_stats_update_begin(&s->syncp);
129 s->ustats.inbytes += skb->len; 132 s->ustats.inbytes += skb->len;
130 u64_stats_update_end(&s->syncp); 133 u64_stats_update_end(&s->syncp);
134 rcu_read_unlock();
131 135
132 s = this_cpu_ptr(ipvs->tot_stats.cpustats); 136 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
133 s->ustats.inpkts++; 137 s->ustats.inpkts++;
@@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
146 150
147 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 151 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
148 struct ip_vs_cpu_stats *s; 152 struct ip_vs_cpu_stats *s;
153 struct ip_vs_service *svc;
149 154
150 s = this_cpu_ptr(dest->stats.cpustats); 155 s = this_cpu_ptr(dest->stats.cpustats);
151 s->ustats.outpkts++; 156 s->ustats.outpkts++;
@@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
153 s->ustats.outbytes += skb->len; 158 s->ustats.outbytes += skb->len;
154 u64_stats_update_end(&s->syncp); 159 u64_stats_update_end(&s->syncp);
155 160
156 s = this_cpu_ptr(dest->svc->stats.cpustats); 161 rcu_read_lock();
162 svc = rcu_dereference(dest->svc);
163 s = this_cpu_ptr(svc->stats.cpustats);
157 s->ustats.outpkts++; 164 s->ustats.outpkts++;
158 u64_stats_update_begin(&s->syncp); 165 u64_stats_update_begin(&s->syncp);
159 s->ustats.outbytes += skb->len; 166 s->ustats.outbytes += skb->len;
160 u64_stats_update_end(&s->syncp); 167 u64_stats_update_end(&s->syncp);
168 rcu_read_unlock();
161 169
162 s = this_cpu_ptr(ipvs->tot_stats.cpustats); 170 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
163 s->ustats.outpkts++; 171 s->ustats.outpkts++;
@@ -305,7 +313,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
305 * return *ignored=0 i.e. ICMP and NF_DROP 313 * return *ignored=0 i.e. ICMP and NF_DROP
306 */ 314 */
307 sched = rcu_dereference(svc->scheduler); 315 sched = rcu_dereference(svc->scheduler);
308 dest = sched->schedule(svc, skb); 316 dest = sched->schedule(svc, skb, iph);
309 if (!dest) { 317 if (!dest) {
310 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 318 IP_VS_DBG(1, "p-schedule: no dest found.\n");
311 kfree(param.pe_data); 319 kfree(param.pe_data);
@@ -452,7 +460,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
452 } 460 }
453 461
454 sched = rcu_dereference(svc->scheduler); 462 sched = rcu_dereference(svc->scheduler);
455 dest = sched->schedule(svc, skb); 463 dest = sched->schedule(svc, skb, iph);
456 if (dest == NULL) { 464 if (dest == NULL) {
457 IP_VS_DBG(1, "Schedule: no dest found.\n"); 465 IP_VS_DBG(1, "Schedule: no dest found.\n");
458 return NULL; 466 return NULL;
@@ -1442,7 +1450,8 @@ ignore_ipip:
1442 1450
1443 /* do the statistics and put it back */ 1451 /* do the statistics and put it back */
1444 ip_vs_in_stats(cp, skb); 1452 ip_vs_in_stats(cp, skb);
1445 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) 1453 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol ||
1454 IPPROTO_SCTP == cih->protocol)
1446 offset += 2 * sizeof(__u16); 1455 offset += 2 * sizeof(__u16);
1447 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph); 1456 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
1448 1457
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5b142fb16480..a3df9bddc4f7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -460,7 +460,7 @@ static inline void
460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
461{ 461{
462 atomic_inc(&svc->refcnt); 462 atomic_inc(&svc->refcnt);
463 dest->svc = svc; 463 rcu_assign_pointer(dest->svc, svc);
464} 464}
465 465
466static void ip_vs_service_free(struct ip_vs_service *svc) 466static void ip_vs_service_free(struct ip_vs_service *svc)
@@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc)
470 kfree(svc); 470 kfree(svc);
471} 471}
472 472
473static void 473static void ip_vs_service_rcu_free(struct rcu_head *head)
474__ip_vs_unbind_svc(struct ip_vs_dest *dest)
475{ 474{
476 struct ip_vs_service *svc = dest->svc; 475 struct ip_vs_service *svc;
476
477 svc = container_of(head, struct ip_vs_service, rcu_head);
478 ip_vs_service_free(svc);
479}
477 480
478 dest->svc = NULL; 481static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
482{
479 if (atomic_dec_and_test(&svc->refcnt)) { 483 if (atomic_dec_and_test(&svc->refcnt)) {
480 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 484 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
481 svc->fwmark, 485 svc->fwmark,
482 IP_VS_DBG_ADDR(svc->af, &svc->addr), 486 IP_VS_DBG_ADDR(svc->af, &svc->addr),
483 ntohs(svc->port)); 487 ntohs(svc->port));
484 ip_vs_service_free(svc); 488 if (do_delay)
489 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
490 else
491 ip_vs_service_free(svc);
485 } 492 }
486} 493}
487 494
@@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
667 IP_VS_DBG_ADDR(svc->af, &dest->addr), 674 IP_VS_DBG_ADDR(svc->af, &dest->addr),
668 ntohs(dest->port), 675 ntohs(dest->port),
669 atomic_read(&dest->refcnt)); 676 atomic_read(&dest->refcnt));
670 /* We can not reuse dest while in grace period
671 * because conns still can use dest->svc
672 */
673 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
674 continue;
675 if (dest->af == svc->af && 677 if (dest->af == svc->af &&
676 ip_vs_addr_equal(svc->af, &dest->addr, daddr) && 678 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
677 dest->port == dport && 679 dest->port == dport &&
@@ -697,8 +699,10 @@ out:
697 699
698static void ip_vs_dest_free(struct ip_vs_dest *dest) 700static void ip_vs_dest_free(struct ip_vs_dest *dest)
699{ 701{
702 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
703
700 __ip_vs_dst_cache_reset(dest); 704 __ip_vs_dst_cache_reset(dest);
701 __ip_vs_unbind_svc(dest); 705 __ip_vs_svc_put(svc, false);
702 free_percpu(dest->stats.cpustats); 706 free_percpu(dest->stats.cpustats);
703 kfree(dest); 707 kfree(dest);
704} 708}
@@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771 struct ip_vs_dest_user_kern *udest, int add) 775 struct ip_vs_dest_user_kern *udest, int add)
772{ 776{
773 struct netns_ipvs *ipvs = net_ipvs(svc->net); 777 struct netns_ipvs *ipvs = net_ipvs(svc->net);
778 struct ip_vs_service *old_svc;
774 struct ip_vs_scheduler *sched; 779 struct ip_vs_scheduler *sched;
775 int conn_flags; 780 int conn_flags;
776 781
@@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
792 atomic_set(&dest->conn_flags, conn_flags); 797 atomic_set(&dest->conn_flags, conn_flags);
793 798
794 /* bind the service */ 799 /* bind the service */
795 if (!dest->svc) { 800 old_svc = rcu_dereference_protected(dest->svc, 1);
801 if (!old_svc) {
796 __ip_vs_bind_svc(dest, svc); 802 __ip_vs_bind_svc(dest, svc);
797 } else { 803 } else {
798 if (dest->svc != svc) { 804 if (old_svc != svc) {
799 __ip_vs_unbind_svc(dest);
800 ip_vs_zero_stats(&dest->stats); 805 ip_vs_zero_stats(&dest->stats);
801 __ip_vs_bind_svc(dest, svc); 806 __ip_vs_bind_svc(dest, svc);
807 __ip_vs_svc_put(old_svc, true);
802 } 808 }
803 } 809 }
804 810
@@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
998 return 0; 1004 return 0;
999} 1005}
1000 1006
1001static void ip_vs_dest_wait_readers(struct rcu_head *head)
1002{
1003 struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
1004 rcu_head);
1005
1006 /* End of grace period after unlinking */
1007 clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1008}
1009
1010
1011/* 1007/*
1012 * Delete a destination (must be already unlinked from the service) 1008 * Delete a destination (must be already unlinked from the service)
1013 */ 1009 */
@@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
1023 */ 1019 */
1024 ip_vs_rs_unhash(dest); 1020 ip_vs_rs_unhash(dest);
1025 1021
1026 if (!cleanup) {
1027 set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1028 call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
1029 }
1030
1031 spin_lock_bh(&ipvs->dest_trash_lock); 1022 spin_lock_bh(&ipvs->dest_trash_lock);
1032 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1023 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1033 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1024 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1034 atomic_read(&dest->refcnt)); 1025 atomic_read(&dest->refcnt));
1035 if (list_empty(&ipvs->dest_trash) && !cleanup) 1026 if (list_empty(&ipvs->dest_trash) && !cleanup)
1036 mod_timer(&ipvs->dest_trash_timer, 1027 mod_timer(&ipvs->dest_trash_timer,
1037 jiffies + IP_VS_DEST_TRASH_PERIOD); 1028 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1038 /* dest lives in trash without reference */ 1029 /* dest lives in trash without reference */
1039 list_add(&dest->t_list, &ipvs->dest_trash); 1030 list_add(&dest->t_list, &ipvs->dest_trash);
1031 dest->idle_start = 0;
1040 spin_unlock_bh(&ipvs->dest_trash_lock); 1032 spin_unlock_bh(&ipvs->dest_trash_lock);
1041 ip_vs_dest_put(dest); 1033 ip_vs_dest_put(dest);
1042} 1034}
@@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data)
1108 struct net *net = (struct net *) data; 1100 struct net *net = (struct net *) data;
1109 struct netns_ipvs *ipvs = net_ipvs(net); 1101 struct netns_ipvs *ipvs = net_ipvs(net);
1110 struct ip_vs_dest *dest, *next; 1102 struct ip_vs_dest *dest, *next;
1103 unsigned long now = jiffies;
1111 1104
1112 spin_lock(&ipvs->dest_trash_lock); 1105 spin_lock(&ipvs->dest_trash_lock);
1113 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1106 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1114 /* Skip if dest is in grace period */
1115 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
1116 continue;
1117 if (atomic_read(&dest->refcnt) > 0) 1107 if (atomic_read(&dest->refcnt) > 0)
1118 continue; 1108 continue;
1109 if (dest->idle_start) {
1110 if (time_before(now, dest->idle_start +
1111 IP_VS_DEST_TRASH_PERIOD))
1112 continue;
1113 } else {
1114 dest->idle_start = max(1UL, now);
1115 continue;
1116 }
1119 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1117 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1120 dest->vfwmark, 1118 dest->vfwmark,
1121 IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), 1119 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1122 ntohs(dest->port)); 1120 ntohs(dest->port));
1123 list_del(&dest->t_list); 1121 list_del(&dest->t_list);
1124 ip_vs_dest_free(dest); 1122 ip_vs_dest_free(dest);
1125 } 1123 }
1126 if (!list_empty(&ipvs->dest_trash)) 1124 if (!list_empty(&ipvs->dest_trash))
1127 mod_timer(&ipvs->dest_trash_timer, 1125 mod_timer(&ipvs->dest_trash_timer,
1128 jiffies + IP_VS_DEST_TRASH_PERIOD); 1126 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1129 spin_unlock(&ipvs->dest_trash_lock); 1127 spin_unlock(&ipvs->dest_trash_lock);
1130} 1128}
1131 1129
@@ -1320,14 +1318,6 @@ out:
1320 return ret; 1318 return ret;
1321} 1319}
1322 1320
1323static void ip_vs_service_rcu_free(struct rcu_head *head)
1324{
1325 struct ip_vs_service *svc;
1326
1327 svc = container_of(head, struct ip_vs_service, rcu_head);
1328 ip_vs_service_free(svc);
1329}
1330
1331/* 1321/*
1332 * Delete a service from the service list 1322 * Delete a service from the service list
1333 * - The service must be unlinked, unlocked and not referenced! 1323 * - The service must be unlinked, unlocked and not referenced!
@@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1376 /* 1366 /*
1377 * Free the service if nobody refers to it 1367 * Free the service if nobody refers to it
1378 */ 1368 */
1379 if (atomic_dec_and_test(&svc->refcnt)) { 1369 __ip_vs_svc_put(svc, true);
1380 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
1381 svc->fwmark,
1382 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1383 ntohs(svc->port));
1384 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
1385 }
1386 1370
1387 /* decrease the module use count */ 1371 /* decrease the module use count */
1388 ip_vs_use_count_dec(); 1372 ip_vs_use_count_dec();
@@ -1487,9 +1471,9 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
1487 * Currently only NETDEV_DOWN is handled to release refs to cached dsts 1471 * Currently only NETDEV_DOWN is handled to release refs to cached dsts
1488 */ 1472 */
1489static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1473static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1490 void *ptr) 1474 void *ptr)
1491{ 1475{
1492 struct net_device *dev = ptr; 1476 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1493 struct net *net = dev_net(dev); 1477 struct net *net = dev_net(dev);
1494 struct netns_ipvs *ipvs = net_ipvs(net); 1478 struct netns_ipvs *ipvs = net_ipvs(net);
1495 struct ip_vs_service *svc; 1479 struct ip_vs_service *svc;
@@ -1575,7 +1559,7 @@ static int zero;
1575static int three = 3; 1559static int three = 3;
1576 1560
1577static int 1561static int
1578proc_do_defense_mode(ctl_table *table, int write, 1562proc_do_defense_mode(struct ctl_table *table, int write,
1579 void __user *buffer, size_t *lenp, loff_t *ppos) 1563 void __user *buffer, size_t *lenp, loff_t *ppos)
1580{ 1564{
1581 struct net *net = current->nsproxy->net_ns; 1565 struct net *net = current->nsproxy->net_ns;
@@ -1596,7 +1580,7 @@ proc_do_defense_mode(ctl_table *table, int write,
1596} 1580}
1597 1581
1598static int 1582static int
1599proc_do_sync_threshold(ctl_table *table, int write, 1583proc_do_sync_threshold(struct ctl_table *table, int write,
1600 void __user *buffer, size_t *lenp, loff_t *ppos) 1584 void __user *buffer, size_t *lenp, loff_t *ppos)
1601{ 1585{
1602 int *valp = table->data; 1586 int *valp = table->data;
@@ -1616,7 +1600,7 @@ proc_do_sync_threshold(ctl_table *table, int write,
1616} 1600}
1617 1601
1618static int 1602static int
1619proc_do_sync_mode(ctl_table *table, int write, 1603proc_do_sync_mode(struct ctl_table *table, int write,
1620 void __user *buffer, size_t *lenp, loff_t *ppos) 1604 void __user *buffer, size_t *lenp, loff_t *ppos)
1621{ 1605{
1622 int *valp = table->data; 1606 int *valp = table->data;
@@ -1634,7 +1618,7 @@ proc_do_sync_mode(ctl_table *table, int write,
1634} 1618}
1635 1619
1636static int 1620static int
1637proc_do_sync_ports(ctl_table *table, int write, 1621proc_do_sync_ports(struct ctl_table *table, int write,
1638 void __user *buffer, size_t *lenp, loff_t *ppos) 1622 void __user *buffer, size_t *lenp, loff_t *ppos)
1639{ 1623{
1640 int *valp = table->data; 1624 int *valp = table->data;
@@ -1715,12 +1699,18 @@ static struct ctl_table vs_vars[] = {
1715 .proc_handler = &proc_do_sync_ports, 1699 .proc_handler = &proc_do_sync_ports,
1716 }, 1700 },
1717 { 1701 {
1718 .procname = "sync_qlen_max", 1702 .procname = "sync_persist_mode",
1719 .maxlen = sizeof(int), 1703 .maxlen = sizeof(int),
1720 .mode = 0644, 1704 .mode = 0644,
1721 .proc_handler = proc_dointvec, 1705 .proc_handler = proc_dointvec,
1722 }, 1706 },
1723 { 1707 {
1708 .procname = "sync_qlen_max",
1709 .maxlen = sizeof(unsigned long),
1710 .mode = 0644,
1711 .proc_handler = proc_doulongvec_minmax,
1712 },
1713 {
1724 .procname = "sync_sock_size", 1714 .procname = "sync_sock_size",
1725 .maxlen = sizeof(int), 1715 .maxlen = sizeof(int),
1726 .mode = 0644, 1716 .mode = 0644,
@@ -1739,6 +1729,18 @@ static struct ctl_table vs_vars[] = {
1739 .proc_handler = proc_dointvec, 1729 .proc_handler = proc_dointvec,
1740 }, 1730 },
1741 { 1731 {
1732 .procname = "sloppy_tcp",
1733 .maxlen = sizeof(int),
1734 .mode = 0644,
1735 .proc_handler = proc_dointvec,
1736 },
1737 {
1738 .procname = "sloppy_sctp",
1739 .maxlen = sizeof(int),
1740 .mode = 0644,
1741 .proc_handler = proc_dointvec,
1742 },
1743 {
1742 .procname = "expire_quiescent_template", 1744 .procname = "expire_quiescent_template",
1743 .maxlen = sizeof(int), 1745 .maxlen = sizeof(int),
1744 .mode = 0644, 1746 .mode = 0644,
@@ -2542,6 +2544,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2542 struct ip_vs_dest *dest; 2544 struct ip_vs_dest *dest;
2543 struct ip_vs_dest_entry entry; 2545 struct ip_vs_dest_entry entry;
2544 2546
2547 memset(&entry, 0, sizeof(entry));
2545 list_for_each_entry(dest, &svc->destinations, n_list) { 2548 list_for_each_entry(dest, &svc->destinations, n_list) {
2546 if (count >= get->num_dests) 2549 if (count >= get->num_dests)
2547 break; 2550 break;
@@ -3716,12 +3719,15 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3716 tbl[idx++].data = &ipvs->sysctl_sync_ver; 3719 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3717 ipvs->sysctl_sync_ports = 1; 3720 ipvs->sysctl_sync_ports = 1;
3718 tbl[idx++].data = &ipvs->sysctl_sync_ports; 3721 tbl[idx++].data = &ipvs->sysctl_sync_ports;
3722 tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
3719 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; 3723 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3720 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; 3724 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3721 ipvs->sysctl_sync_sock_size = 0; 3725 ipvs->sysctl_sync_sock_size = 0;
3722 tbl[idx++].data = &ipvs->sysctl_sync_sock_size; 3726 tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3723 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 3727 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3724 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 3728 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3729 tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
3730 tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
3725 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 3731 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3726 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 3732 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3727 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 3733 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index ccab120df45e..c3b84546ea9e 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -214,18 +214,16 @@ static inline int is_overloaded(struct ip_vs_dest *dest)
214 * Destination hashing scheduling 214 * Destination hashing scheduling
215 */ 215 */
216static struct ip_vs_dest * 216static struct ip_vs_dest *
217ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 217ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
218 struct ip_vs_iphdr *iph)
218{ 219{
219 struct ip_vs_dest *dest; 220 struct ip_vs_dest *dest;
220 struct ip_vs_dh_state *s; 221 struct ip_vs_dh_state *s;
221 struct ip_vs_iphdr iph;
222
223 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
224 222
225 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 223 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
226 224
227 s = (struct ip_vs_dh_state *) svc->sched_data; 225 s = (struct ip_vs_dh_state *) svc->sched_data;
228 dest = ip_vs_dh_get(svc->af, s, &iph.daddr); 226 dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
229 if (!dest 227 if (!dest
230 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 228 || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
231 || atomic_read(&dest->weight) <= 0 229 || atomic_read(&dest->weight) <= 0
@@ -235,7 +233,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
235 } 233 }
236 234
237 IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n", 235 IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
238 IP_VS_DBG_ADDR(svc->af, &iph.daddr), 236 IP_VS_DBG_ADDR(svc->af, &iph->daddr),
239 IP_VS_DBG_ADDR(svc->af, &dest->addr), 237 IP_VS_DBG_ADDR(svc->af, &dest->addr),
240 ntohs(dest->port)); 238 ntohs(dest->port));
241 239
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 6bee6d0c73a5..1425e9a924c4 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
59 struct ip_vs_cpu_stats __percpu *stats) 59 struct ip_vs_cpu_stats __percpu *stats)
60{ 60{
61 int i; 61 int i;
62 bool add = false;
62 63
63 for_each_possible_cpu(i) { 64 for_each_possible_cpu(i) {
64 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); 65 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
65 unsigned int start; 66 unsigned int start;
66 __u64 inbytes, outbytes; 67 __u64 inbytes, outbytes;
67 if (i) { 68 if (add) {
68 sum->conns += s->ustats.conns; 69 sum->conns += s->ustats.conns;
69 sum->inpkts += s->ustats.inpkts; 70 sum->inpkts += s->ustats.inpkts;
70 sum->outpkts += s->ustats.outpkts; 71 sum->outpkts += s->ustats.outpkts;
@@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
76 sum->inbytes += inbytes; 77 sum->inbytes += inbytes;
77 sum->outbytes += outbytes; 78 sum->outbytes += outbytes;
78 } else { 79 } else {
80 add = true;
79 sum->conns = s->ustats.conns; 81 sum->conns = s->ustats.conns;
80 sum->inpkts = s->ustats.inpkts; 82 sum->inpkts = s->ustats.inpkts;
81 sum->outpkts = s->ustats.outpkts; 83 sum->outpkts = s->ustats.outpkts;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 5ea26bd87743..eff13c94498e 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry {
93 struct hlist_node list; 93 struct hlist_node list;
94 int af; /* address family */ 94 int af; /* address family */
95 union nf_inet_addr addr; /* destination IP address */ 95 union nf_inet_addr addr; /* destination IP address */
96 struct ip_vs_dest __rcu *dest; /* real server (cache) */ 96 struct ip_vs_dest *dest; /* real server (cache) */
97 unsigned long lastuse; /* last used time */ 97 unsigned long lastuse; /* last used time */
98 struct rcu_head rcu_head; 98 struct rcu_head rcu_head;
99}; 99};
@@ -118,7 +118,7 @@ struct ip_vs_lblc_table {
118 * IPVS LBLC sysctl table 118 * IPVS LBLC sysctl table
119 */ 119 */
120#ifdef CONFIG_SYSCTL 120#ifdef CONFIG_SYSCTL
121static ctl_table vs_vars_table[] = { 121static struct ctl_table vs_vars_table[] = {
122 { 122 {
123 .procname = "lblc_expiration", 123 .procname = "lblc_expiration",
124 .data = NULL, 124 .data = NULL,
@@ -130,20 +130,21 @@ static ctl_table vs_vars_table[] = {
130}; 130};
131#endif 131#endif
132 132
133static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) 133static void ip_vs_lblc_rcu_free(struct rcu_head *head)
134{ 134{
135 struct ip_vs_dest *dest; 135 struct ip_vs_lblc_entry *en = container_of(head,
136 struct ip_vs_lblc_entry,
137 rcu_head);
136 138
137 hlist_del_rcu(&en->list); 139 ip_vs_dest_put(en->dest);
138 /* 140 kfree(en);
139 * We don't kfree dest because it is referred either by its service
140 * or the trash dest list.
141 */
142 dest = rcu_dereference_protected(en->dest, 1);
143 ip_vs_dest_put(dest);
144 kfree_rcu(en, rcu_head);
145} 141}
146 142
143static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
144{
145 hlist_del_rcu(&en->list);
146 call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
147}
147 148
148/* 149/*
149 * Returns hash value for IPVS LBLC entry 150 * Returns hash value for IPVS LBLC entry
@@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
203 struct ip_vs_lblc_entry *en; 204 struct ip_vs_lblc_entry *en;
204 205
205 en = ip_vs_lblc_get(dest->af, tbl, daddr); 206 en = ip_vs_lblc_get(dest->af, tbl, daddr);
206 if (!en) { 207 if (en) {
207 en = kmalloc(sizeof(*en), GFP_ATOMIC); 208 if (en->dest == dest)
208 if (!en) 209 return en;
209 return NULL; 210 ip_vs_lblc_del(en);
210 211 }
211 en->af = dest->af; 212 en = kmalloc(sizeof(*en), GFP_ATOMIC);
212 ip_vs_addr_copy(dest->af, &en->addr, daddr); 213 if (!en)
213 en->lastuse = jiffies; 214 return NULL;
214 215
215 ip_vs_dest_hold(dest); 216 en->af = dest->af;
216 RCU_INIT_POINTER(en->dest, dest); 217 ip_vs_addr_copy(dest->af, &en->addr, daddr);
218 en->lastuse = jiffies;
217 219
218 ip_vs_lblc_hash(tbl, en); 220 ip_vs_dest_hold(dest);
219 } else { 221 en->dest = dest;
220 struct ip_vs_dest *old_dest;
221 222
222 old_dest = rcu_dereference_protected(en->dest, 1); 223 ip_vs_lblc_hash(tbl, en);
223 if (old_dest != dest) {
224 ip_vs_dest_put(old_dest);
225 ip_vs_dest_hold(dest);
226 /* No ordering constraints for refcnt */
227 RCU_INIT_POINTER(en->dest, dest);
228 }
229 }
230 224
231 return en; 225 return en;
232} 226}
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
246 tbl->dead = 1; 240 tbl->dead = 1;
247 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { 241 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
248 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { 242 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
249 ip_vs_lblc_free(en); 243 ip_vs_lblc_del(en);
250 atomic_dec(&tbl->entries); 244 atomic_dec(&tbl->entries);
251 } 245 }
252 } 246 }
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
281 sysctl_lblc_expiration(svc))) 275 sysctl_lblc_expiration(svc)))
282 continue; 276 continue;
283 277
284 ip_vs_lblc_free(en); 278 ip_vs_lblc_del(en);
285 atomic_dec(&tbl->entries); 279 atomic_dec(&tbl->entries);
286 } 280 }
287 spin_unlock(&svc->sched_lock); 281 spin_unlock(&svc->sched_lock);
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
335 if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) 329 if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
336 continue; 330 continue;
337 331
338 ip_vs_lblc_free(en); 332 ip_vs_lblc_del(en);
339 atomic_dec(&tbl->entries); 333 atomic_dec(&tbl->entries);
340 goal--; 334 goal--;
341 } 335 }
@@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
443 continue; 437 continue;
444 438
445 doh = ip_vs_dest_conn_overhead(dest); 439 doh = ip_vs_dest_conn_overhead(dest);
446 if (loh * atomic_read(&dest->weight) > 440 if ((__s64)loh * atomic_read(&dest->weight) >
447 doh * atomic_read(&least->weight)) { 441 (__s64)doh * atomic_read(&least->weight)) {
448 least = dest; 442 least = dest;
449 loh = doh; 443 loh = doh;
450 } 444 }
@@ -487,19 +481,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
487 * Locality-Based (weighted) Least-Connection scheduling 481 * Locality-Based (weighted) Least-Connection scheduling
488 */ 482 */
489static struct ip_vs_dest * 483static struct ip_vs_dest *
490ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 484ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
485 struct ip_vs_iphdr *iph)
491{ 486{
492 struct ip_vs_lblc_table *tbl = svc->sched_data; 487 struct ip_vs_lblc_table *tbl = svc->sched_data;
493 struct ip_vs_iphdr iph;
494 struct ip_vs_dest *dest = NULL; 488 struct ip_vs_dest *dest = NULL;
495 struct ip_vs_lblc_entry *en; 489 struct ip_vs_lblc_entry *en;
496 490
497 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
498
499 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 491 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
500 492
501 /* First look in our cache */ 493 /* First look in our cache */
502 en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr); 494 en = ip_vs_lblc_get(svc->af, tbl, &iph->daddr);
503 if (en) { 495 if (en) {
504 /* We only hold a read lock, but this is atomic */ 496 /* We only hold a read lock, but this is atomic */
505 en->lastuse = jiffies; 497 en->lastuse = jiffies;
@@ -513,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
513 * free up entries from the trash at any time. 505 * free up entries from the trash at any time.
514 */ 506 */
515 507
516 dest = rcu_dereference(en->dest); 508 dest = en->dest;
517 if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && 509 if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
518 atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) 510 atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
519 goto out; 511 goto out;
@@ -529,12 +521,12 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
529 /* If we fail to create a cache entry, we'll just use the valid dest */ 521 /* If we fail to create a cache entry, we'll just use the valid dest */
530 spin_lock_bh(&svc->sched_lock); 522 spin_lock_bh(&svc->sched_lock);
531 if (!tbl->dead) 523 if (!tbl->dead)
532 ip_vs_lblc_new(tbl, &iph.daddr, dest); 524 ip_vs_lblc_new(tbl, &iph->daddr, dest);
533 spin_unlock_bh(&svc->sched_lock); 525 spin_unlock_bh(&svc->sched_lock);
534 526
535out: 527out:
536 IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", 528 IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
537 IP_VS_DBG_ADDR(svc->af, &iph.daddr), 529 IP_VS_DBG_ADDR(svc->af, &iph->daddr),
538 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); 530 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
539 531
540 return dest; 532 return dest;
@@ -633,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void)
633{ 625{
634 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); 626 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
635 unregister_pernet_subsys(&ip_vs_lblc_ops); 627 unregister_pernet_subsys(&ip_vs_lblc_ops);
636 synchronize_rcu(); 628 rcu_barrier();
637} 629}
638 630
639 631
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 50123c2ab484..0b8550089a2e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,7 +89,7 @@
89 */ 89 */
90struct ip_vs_dest_set_elem { 90struct ip_vs_dest_set_elem {
91 struct list_head list; /* list link */ 91 struct list_head list; /* list link */
92 struct ip_vs_dest __rcu *dest; /* destination server */ 92 struct ip_vs_dest *dest; /* destination server */
93 struct rcu_head rcu_head; 93 struct rcu_head rcu_head;
94}; 94};
95 95
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
107 107
108 if (check) { 108 if (check) {
109 list_for_each_entry(e, &set->list, list) { 109 list_for_each_entry(e, &set->list, list) {
110 struct ip_vs_dest *d; 110 if (e->dest == dest)
111
112 d = rcu_dereference_protected(e->dest, 1);
113 if (d == dest)
114 /* already existed */
115 return; 111 return;
116 } 112 }
117 } 113 }
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
121 return; 117 return;
122 118
123 ip_vs_dest_hold(dest); 119 ip_vs_dest_hold(dest);
124 RCU_INIT_POINTER(e->dest, dest); 120 e->dest = dest;
125 121
126 list_add_rcu(&e->list, &set->list); 122 list_add_rcu(&e->list, &set->list);
127 atomic_inc(&set->size); 123 atomic_inc(&set->size);
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
129 set->lastmod = jiffies; 125 set->lastmod = jiffies;
130} 126}
131 127
128static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
129{
130 struct ip_vs_dest_set_elem *e;
131
132 e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
133 ip_vs_dest_put(e->dest);
134 kfree(e);
135}
136
132static void 137static void
133ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) 138ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
134{ 139{
135 struct ip_vs_dest_set_elem *e; 140 struct ip_vs_dest_set_elem *e;
136 141
137 list_for_each_entry(e, &set->list, list) { 142 list_for_each_entry(e, &set->list, list) {
138 struct ip_vs_dest *d; 143 if (e->dest == dest) {
139
140 d = rcu_dereference_protected(e->dest, 1);
141 if (d == dest) {
142 /* HIT */ 144 /* HIT */
143 atomic_dec(&set->size); 145 atomic_dec(&set->size);
144 set->lastmod = jiffies; 146 set->lastmod = jiffies;
145 ip_vs_dest_put(dest);
146 list_del_rcu(&e->list); 147 list_del_rcu(&e->list);
147 kfree_rcu(e, rcu_head); 148 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
148 break; 149 break;
149 } 150 }
150 } 151 }
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
155 struct ip_vs_dest_set_elem *e, *ep; 156 struct ip_vs_dest_set_elem *e, *ep;
156 157
157 list_for_each_entry_safe(e, ep, &set->list, list) { 158 list_for_each_entry_safe(e, ep, &set->list, list) {
158 struct ip_vs_dest *d;
159
160 d = rcu_dereference_protected(e->dest, 1);
161 /*
162 * We don't kfree dest because it is referred either
163 * by its service or by the trash dest list.
164 */
165 ip_vs_dest_put(d);
166 list_del_rcu(&e->list); 159 list_del_rcu(&e->list);
167 kfree_rcu(e, rcu_head); 160 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
168 } 161 }
169} 162}
170 163
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
175 struct ip_vs_dest *dest, *least; 168 struct ip_vs_dest *dest, *least;
176 int loh, doh; 169 int loh, doh;
177 170
178 if (set == NULL)
179 return NULL;
180
181 /* select the first destination server, whose weight > 0 */ 171 /* select the first destination server, whose weight > 0 */
182 list_for_each_entry_rcu(e, &set->list, list) { 172 list_for_each_entry_rcu(e, &set->list, list) {
183 least = rcu_dereference(e->dest); 173 least = e->dest;
184 if (least->flags & IP_VS_DEST_F_OVERLOAD) 174 if (least->flags & IP_VS_DEST_F_OVERLOAD)
185 continue; 175 continue;
186 176
@@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
195 /* find the destination with the weighted least load */ 185 /* find the destination with the weighted least load */
196 nextstage: 186 nextstage:
197 list_for_each_entry_continue_rcu(e, &set->list, list) { 187 list_for_each_entry_continue_rcu(e, &set->list, list) {
198 dest = rcu_dereference(e->dest); 188 dest = e->dest;
199 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 189 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
200 continue; 190 continue;
201 191
202 doh = ip_vs_dest_conn_overhead(dest); 192 doh = ip_vs_dest_conn_overhead(dest);
203 if ((loh * atomic_read(&dest->weight) > 193 if (((__s64)loh * atomic_read(&dest->weight) >
204 doh * atomic_read(&least->weight)) 194 (__s64)doh * atomic_read(&least->weight))
205 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 195 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
206 least = dest; 196 least = dest;
207 loh = doh; 197 loh = doh;
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
232 222
233 /* select the first destination server, whose weight > 0 */ 223 /* select the first destination server, whose weight > 0 */
234 list_for_each_entry(e, &set->list, list) { 224 list_for_each_entry(e, &set->list, list) {
235 most = rcu_dereference_protected(e->dest, 1); 225 most = e->dest;
236 if (atomic_read(&most->weight) > 0) { 226 if (atomic_read(&most->weight) > 0) {
237 moh = ip_vs_dest_conn_overhead(most); 227 moh = ip_vs_dest_conn_overhead(most);
238 goto nextstage; 228 goto nextstage;
@@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
243 /* find the destination with the weighted most load */ 233 /* find the destination with the weighted most load */
244 nextstage: 234 nextstage:
245 list_for_each_entry_continue(e, &set->list, list) { 235 list_for_each_entry_continue(e, &set->list, list) {
246 dest = rcu_dereference_protected(e->dest, 1); 236 dest = e->dest;
247 doh = ip_vs_dest_conn_overhead(dest); 237 doh = ip_vs_dest_conn_overhead(dest);
248 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 238 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
249 if ((moh * atomic_read(&dest->weight) < 239 if (((__s64)moh * atomic_read(&dest->weight) <
250 doh * atomic_read(&most->weight)) 240 (__s64)doh * atomic_read(&most->weight))
251 && (atomic_read(&dest->weight) > 0)) { 241 && (atomic_read(&dest->weight) > 0)) {
252 most = dest; 242 most = dest;
253 moh = doh; 243 moh = doh;
@@ -299,7 +289,7 @@ struct ip_vs_lblcr_table {
299 * IPVS LBLCR sysctl table 289 * IPVS LBLCR sysctl table
300 */ 290 */
301 291
302static ctl_table vs_vars_table[] = { 292static struct ctl_table vs_vars_table[] = {
303 { 293 {
304 .procname = "lblcr_expiration", 294 .procname = "lblcr_expiration",
305 .data = NULL, 295 .data = NULL,
@@ -414,7 +404,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
414 404
415 spin_lock_bh(&svc->sched_lock); 405 spin_lock_bh(&svc->sched_lock);
416 tbl->dead = 1; 406 tbl->dead = 1;
417 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { 407 for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
418 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { 408 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
419 ip_vs_lblcr_free(en); 409 ip_vs_lblcr_free(en);
420 } 410 }
@@ -440,7 +430,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
440 struct ip_vs_lblcr_entry *en; 430 struct ip_vs_lblcr_entry *en;
441 struct hlist_node *next; 431 struct hlist_node *next;
442 432
443 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 433 for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
444 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 434 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
445 435
446 spin_lock(&svc->sched_lock); 436 spin_lock(&svc->sched_lock);
@@ -495,7 +485,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
495 if (goal > tbl->max_size/2) 485 if (goal > tbl->max_size/2)
496 goal = tbl->max_size/2; 486 goal = tbl->max_size/2;
497 487
498 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 488 for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
499 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 489 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
500 490
501 spin_lock(&svc->sched_lock); 491 spin_lock(&svc->sched_lock);
@@ -536,7 +526,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
536 /* 526 /*
537 * Initialize the hash buckets 527 * Initialize the hash buckets
538 */ 528 */
539 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { 529 for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
540 INIT_HLIST_HEAD(&tbl->bucket[i]); 530 INIT_HLIST_HEAD(&tbl->bucket[i]);
541 } 531 }
542 tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; 532 tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
@@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
611 continue; 601 continue;
612 602
613 doh = ip_vs_dest_conn_overhead(dest); 603 doh = ip_vs_dest_conn_overhead(dest);
614 if (loh * atomic_read(&dest->weight) > 604 if ((__s64)loh * atomic_read(&dest->weight) >
615 doh * atomic_read(&least->weight)) { 605 (__s64)doh * atomic_read(&least->weight)) {
616 least = dest; 606 least = dest;
617 loh = doh; 607 loh = doh;
618 } 608 }
@@ -655,19 +645,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
655 * Locality-Based (weighted) Least-Connection scheduling 645 * Locality-Based (weighted) Least-Connection scheduling
656 */ 646 */
657static struct ip_vs_dest * 647static struct ip_vs_dest *
658ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 648ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
649 struct ip_vs_iphdr *iph)
659{ 650{
660 struct ip_vs_lblcr_table *tbl = svc->sched_data; 651 struct ip_vs_lblcr_table *tbl = svc->sched_data;
661 struct ip_vs_iphdr iph;
662 struct ip_vs_dest *dest; 652 struct ip_vs_dest *dest;
663 struct ip_vs_lblcr_entry *en; 653 struct ip_vs_lblcr_entry *en;
664 654
665 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
666
667 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 655 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
668 656
669 /* First look in our cache */ 657 /* First look in our cache */
670 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); 658 en = ip_vs_lblcr_get(svc->af, tbl, &iph->daddr);
671 if (en) { 659 if (en) {
672 en->lastuse = jiffies; 660 en->lastuse = jiffies;
673 661
@@ -718,12 +706,12 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
718 /* If we fail to create a cache entry, we'll just use the valid dest */ 706 /* If we fail to create a cache entry, we'll just use the valid dest */
719 spin_lock_bh(&svc->sched_lock); 707 spin_lock_bh(&svc->sched_lock);
720 if (!tbl->dead) 708 if (!tbl->dead)
721 ip_vs_lblcr_new(tbl, &iph.daddr, dest); 709 ip_vs_lblcr_new(tbl, &iph->daddr, dest);
722 spin_unlock_bh(&svc->sched_lock); 710 spin_unlock_bh(&svc->sched_lock);
723 711
724out: 712out:
725 IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", 713 IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
726 IP_VS_DBG_ADDR(svc->af, &iph.daddr), 714 IP_VS_DBG_ADDR(svc->af, &iph->daddr),
727 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); 715 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
728 716
729 return dest; 717 return dest;
@@ -821,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
821{ 809{
822 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 810 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
823 unregister_pernet_subsys(&ip_vs_lblcr_ops); 811 unregister_pernet_subsys(&ip_vs_lblcr_ops);
824 synchronize_rcu(); 812 rcu_barrier();
825} 813}
826 814
827 815
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 5128e338a749..2bdcb1cf2127 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -26,7 +26,8 @@
26 * Least Connection scheduling 26 * Least Connection scheduling
27 */ 27 */
28static struct ip_vs_dest * 28static struct ip_vs_dest *
29ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 29ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
30 struct ip_vs_iphdr *iph)
30{ 31{
31 struct ip_vs_dest *dest, *least = NULL; 32 struct ip_vs_dest *dest, *least = NULL;
32 unsigned int loh = 0, doh; 33 unsigned int loh = 0, doh;
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 646cfd4baa73..961a6de9bb29 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -40,7 +40,7 @@
40#include <net/ip_vs.h> 40#include <net/ip_vs.h>
41 41
42 42
43static inline unsigned int 43static inline int
44ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) 44ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
45{ 45{
46 /* 46 /*
@@ -55,10 +55,11 @@ ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
55 * Weighted Least Connection scheduling 55 * Weighted Least Connection scheduling
56 */ 56 */
57static struct ip_vs_dest * 57static struct ip_vs_dest *
58ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 58ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
59 struct ip_vs_iphdr *iph)
59{ 60{
60 struct ip_vs_dest *dest, *least = NULL; 61 struct ip_vs_dest *dest, *least = NULL;
61 unsigned int loh = 0, doh; 62 int loh = 0, doh;
62 63
63 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 64 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
64 65
@@ -91,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
91 } 92 }
92 93
93 if (!least || 94 if (!least ||
94 (loh * atomic_read(&dest->weight) > 95 ((__s64)loh * atomic_read(&dest->weight) >
95 doh * atomic_read(&least->weight))) { 96 (__s64)doh * atomic_read(&least->weight))) {
96 least = dest; 97 least = dest;
97 loh = doh; 98 loh = doh;
98 } 99 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 86464881cd20..23e596e438b3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -15,6 +15,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
15{ 15{
16 struct net *net; 16 struct net *net;
17 struct ip_vs_service *svc; 17 struct ip_vs_service *svc;
18 struct netns_ipvs *ipvs;
18 sctp_chunkhdr_t _schunkh, *sch; 19 sctp_chunkhdr_t _schunkh, *sch;
19 sctp_sctphdr_t *sh, _sctph; 20 sctp_sctphdr_t *sh, _sctph;
20 21
@@ -27,13 +28,14 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
27 if (sch == NULL) 28 if (sch == NULL)
28 return 0; 29 return 0;
29 net = skb_net(skb); 30 net = skb_net(skb);
31 ipvs = net_ipvs(net);
30 rcu_read_lock(); 32 rcu_read_lock();
31 if ((sch->type == SCTP_CID_INIT) && 33 if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) &&
32 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, 34 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
33 &iph->daddr, sh->dest))) { 35 &iph->daddr, sh->dest))) {
34 int ignored; 36 int ignored;
35 37
36 if (ip_vs_todrop(net_ipvs(net))) { 38 if (ip_vs_todrop(ipvs)) {
37 /* 39 /*
38 * It seems that we are very loaded. 40 * It seems that we are very loaded.
39 * We have to drop this packet :( 41 * We have to drop this packet :(
@@ -64,15 +66,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
64static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph, 66static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
65 unsigned int sctphoff) 67 unsigned int sctphoff)
66{ 68{
67 __u32 crc32; 69 sctph->checksum = sctp_compute_cksum(skb, sctphoff);
68 struct sk_buff *iter;
69
70 crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff);
71 skb_walk_frags(skb, iter)
72 crc32 = sctp_update_cksum((u8 *) iter->data,
73 skb_headlen(iter), crc32);
74 sctph->checksum = sctp_end_cksum(crc32);
75
76 skb->ip_summed = CHECKSUM_UNNECESSARY; 70 skb->ip_summed = CHECKSUM_UNNECESSARY;
77} 71}
78 72
@@ -149,10 +143,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
149{ 143{
150 unsigned int sctphoff; 144 unsigned int sctphoff;
151 struct sctphdr *sh, _sctph; 145 struct sctphdr *sh, _sctph;
152 struct sk_buff *iter; 146 __le32 cmp, val;
153 __le32 cmp;
154 __le32 val;
155 __u32 tmp;
156 147
157#ifdef CONFIG_IP_VS_IPV6 148#ifdef CONFIG_IP_VS_IPV6
158 if (af == AF_INET6) 149 if (af == AF_INET6)
@@ -166,13 +157,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
166 return 0; 157 return 0;
167 158
168 cmp = sh->checksum; 159 cmp = sh->checksum;
169 160 val = sctp_compute_cksum(skb, sctphoff);
170 tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb));
171 skb_walk_frags(skb, iter)
172 tmp = sctp_update_cksum((__u8 *) iter->data,
173 skb_headlen(iter), tmp);
174
175 val = sctp_end_cksum(tmp);
176 161
177 if (val != cmp) { 162 if (val != cmp) {
178 /* CRC failure, dump it. */ 163 /* CRC failure, dump it. */
@@ -183,710 +168,159 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
183 return 1; 168 return 1;
184} 169}
185 170
186struct ipvs_sctp_nextstate {
187 int next_state;
188};
189enum ipvs_sctp_event_t { 171enum ipvs_sctp_event_t {
190 IP_VS_SCTP_EVE_DATA_CLI, 172 IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */
191 IP_VS_SCTP_EVE_DATA_SER, 173 IP_VS_SCTP_INIT,
192 IP_VS_SCTP_EVE_INIT_CLI, 174 IP_VS_SCTP_INIT_ACK,
193 IP_VS_SCTP_EVE_INIT_SER, 175 IP_VS_SCTP_COOKIE_ECHO,
194 IP_VS_SCTP_EVE_INIT_ACK_CLI, 176 IP_VS_SCTP_COOKIE_ACK,
195 IP_VS_SCTP_EVE_INIT_ACK_SER, 177 IP_VS_SCTP_SHUTDOWN,
196 IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, 178 IP_VS_SCTP_SHUTDOWN_ACK,
197 IP_VS_SCTP_EVE_COOKIE_ECHO_SER, 179 IP_VS_SCTP_SHUTDOWN_COMPLETE,
198 IP_VS_SCTP_EVE_COOKIE_ACK_CLI, 180 IP_VS_SCTP_ERROR,
199 IP_VS_SCTP_EVE_COOKIE_ACK_SER, 181 IP_VS_SCTP_ABORT,
200 IP_VS_SCTP_EVE_ABORT_CLI, 182 IP_VS_SCTP_EVENT_LAST
201 IP_VS_SCTP_EVE__ABORT_SER,
202 IP_VS_SCTP_EVE_SHUT_CLI,
203 IP_VS_SCTP_EVE_SHUT_SER,
204 IP_VS_SCTP_EVE_SHUT_ACK_CLI,
205 IP_VS_SCTP_EVE_SHUT_ACK_SER,
206 IP_VS_SCTP_EVE_SHUT_COM_CLI,
207 IP_VS_SCTP_EVE_SHUT_COM_SER,
208 IP_VS_SCTP_EVE_LAST
209}; 183};
210 184
211static enum ipvs_sctp_event_t sctp_events[256] = { 185/* RFC 2960, 3.2 Chunk Field Descriptions */
212 IP_VS_SCTP_EVE_DATA_CLI, 186static __u8 sctp_events[] = {
213 IP_VS_SCTP_EVE_INIT_CLI, 187 [SCTP_CID_DATA] = IP_VS_SCTP_DATA,
214 IP_VS_SCTP_EVE_INIT_ACK_CLI, 188 [SCTP_CID_INIT] = IP_VS_SCTP_INIT,
215 IP_VS_SCTP_EVE_DATA_CLI, 189 [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK,
216 IP_VS_SCTP_EVE_DATA_CLI, 190 [SCTP_CID_SACK] = IP_VS_SCTP_DATA,
217 IP_VS_SCTP_EVE_DATA_CLI, 191 [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA,
218 IP_VS_SCTP_EVE_ABORT_CLI, 192 [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA,
219 IP_VS_SCTP_EVE_SHUT_CLI, 193 [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT,
220 IP_VS_SCTP_EVE_SHUT_ACK_CLI, 194 [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN,
221 IP_VS_SCTP_EVE_DATA_CLI, 195 [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK,
222 IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, 196 [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR,
223 IP_VS_SCTP_EVE_COOKIE_ACK_CLI, 197 [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO,
224 IP_VS_SCTP_EVE_DATA_CLI, 198 [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK,
225 IP_VS_SCTP_EVE_DATA_CLI, 199 [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA,
226 IP_VS_SCTP_EVE_SHUT_COM_CLI, 200 [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA,
201 [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE,
227}; 202};
228 203
229static struct ipvs_sctp_nextstate 204/* SCTP States:
230 sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = { 205 * See RFC 2960, 4. SCTP Association State Diagram
231 /* 206 *
232 * STATE : IP_VS_SCTP_S_NONE 207 * New states (not in diagram):
233 */ 208 * - INIT1 state: use shorter timeout for dropped INIT packets
234 /*next state *//*event */ 209 * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
235 {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, 210 * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
236 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, 211 *
237 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, 212 * The states are as seen in real server. In the diagram, INIT1, INIT,
238 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, 213 * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
239 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, 214 *
240 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, 215 * States as per packets from client (C) and server (S):
241 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, 216 *
242 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, 217 * Setup of client connection:
243 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, 218 * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
244 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, 219 * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
245 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, 220 * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
246 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, 221 * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
247 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, 222 *
248 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, 223 * Setup of server connection:
249 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, 224 * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
250 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, 225 * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
251 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, 226 * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
252 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }, 227 */
253 },
254 /*
255 * STATE : IP_VS_SCTP_S_INIT_CLI
256 * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT)
257 */
258 {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
259 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
260 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
261 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
262 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
263 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
264 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ },
265 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ },
266 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
267 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
268 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
269 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
270 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
271 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
272 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
273 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
274 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
275 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
276 },
277 /*
278 * State : IP_VS_SCTP_S_INIT_SER
279 * Server sent INIT and waiting for INIT ACK from the client
280 */
281 {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
282 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
283 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
284 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
285 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
286 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
287 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
288 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
289 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
290 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
291 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
292 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
293 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
294 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
295 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
296 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
297 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
298 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
299 },
300 /*
301 * State : IP_VS_SCTP_S_INIT_ACK_CLI
302 * Client sent INIT ACK and waiting for ECHO from the server
303 */
304 {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
305 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
306 /*
307 * We have got an INIT from client. From the spec.“Upon receipt of
308 * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
309 * an INIT ACK using the same parameters it sent in its original
310 * INIT chunk (including its Initiate Tag, unchanged”).
311 */
312 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
313 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
314 /*
315 * INIT_ACK has been resent by the client, let us stay is in
316 * the same state
317 */
318 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
319 /*
320 * INIT_ACK sent by the server, close the connection
321 */
322 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
323 /*
324 * ECHO by client, it should not happen, close the connection
325 */
326 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
327 /*
328 * ECHO by server, this is what we are expecting, move to ECHO_SER
329 */
330 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
331 /*
332 * COOKIE ACK from client, it should not happen, close the connection
333 */
334 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
335 /*
336 * Unexpected COOKIE ACK from server, staty in the same state
337 */
338 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
339 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
340 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
341 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
342 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
343 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
344 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
345 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
346 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
347 },
348 /*
349 * State : IP_VS_SCTP_S_INIT_ACK_SER
350 * Server sent INIT ACK and waiting for ECHO from the client
351 */
352 {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
353 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
354 /*
355 * We have got an INIT from client. From the spec.“Upon receipt of
356 * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
357 * an INIT ACK using the same parameters it sent in its original
358 * INIT chunk (including its Initiate Tag, unchanged”).
359 */
360 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
361 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
362 /*
363 * Unexpected INIT_ACK by the client, let us close the connection
364 */
365 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
366 /*
367 * INIT_ACK resent by the server, let us move to same state
368 */
369 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
370 /*
371 * Client send the ECHO, this is what we are expecting,
372 * move to ECHO_CLI
373 */
374 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
375 /*
376 * ECHO received from the server, Not sure what to do,
377 * let us close it
378 */
379 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
380 /*
381 * COOKIE ACK from client, let us stay in the same state
382 */
383 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
384 /*
385 * COOKIE ACK from server, hmm... this should not happen, lets close
386 * the connection.
387 */
388 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
389 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
390 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
391 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
392 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
393 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
394 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
395 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
396 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
397 },
398 /*
399 * State : IP_VS_SCTP_S_ECHO_CLI
400 * Cient sent ECHO and waiting COOKEI ACK from the Server
401 */
402 {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
403 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
404 /*
405 * We have got an INIT from client. From the spec.“Upon receipt of
406 * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
407 * an INIT ACK using the same parameters it sent in its original
408 * INIT chunk (including its Initiate Tag, unchanged”).
409 */
410 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
411 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
412 /*
413 * INIT_ACK has been by the client, let us close the connection
414 */
415 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
416 /*
417 * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
418 * “If an INIT ACK is received by an endpoint in any state other
419 * than the COOKIE-WAIT state, the endpoint should discard the
420 * INIT ACK chunk”. Stay in the same state
421 */
422 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
423 /*
424 * Client resent the ECHO, let us stay in the same state
425 */
426 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
427 /*
428 * ECHO received from the server, Not sure what to do,
429 * let us close it
430 */
431 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
432 /*
433 * COOKIE ACK from client, this shoud not happen, let's close the
434 * connection
435 */
436 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
437 /*
438 * COOKIE ACK from server, this is what we are awaiting,lets move to
439 * ESTABLISHED.
440 */
441 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
442 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
443 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
444 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
445 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
446 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
447 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
448 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
449 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
450 },
451 /*
452 * State : IP_VS_SCTP_S_ECHO_SER
453 * Server sent ECHO and waiting COOKEI ACK from the client
454 */
455 {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
456 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
457 /*
458 * We have got an INIT from client. From the spec.“Upon receipt of
459 * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
460 * an INIT ACK using the same parameters it sent in its original
461 * INIT chunk (including its Initiate Tag, unchanged”).
462 */
463 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
464 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
465 /*
466 * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
467 * “If an INIT ACK is received by an endpoint in any state other
468 * than the COOKIE-WAIT state, the endpoint should discard the
469 * INIT ACK chunk”. Stay in the same state
470 */
471 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
472 /*
473 * INIT_ACK has been by the server, let us close the connection
474 */
475 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
476 /*
477 * Client sent the ECHO, not sure what to do, let's close the
478 * connection.
479 */
480 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
481 /*
482 * ECHO resent by the server, stay in the same state
483 */
484 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
485 /*
486 * COOKIE ACK from client, this is what we are expecting, let's move
487 * to ESTABLISHED.
488 */
489 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
490 /*
491 * COOKIE ACK from server, this should not happen, lets close the
492 * connection.
493 */
494 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
495 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
496 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
497 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
498 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
499 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
500 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
501 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
502 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
503 },
504 /*
505 * State : IP_VS_SCTP_S_ESTABLISHED
506 * Association established
507 */
508 {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ },
509 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ },
510 /*
511 * We have got an INIT from client. From the spec.“Upon receipt of
512 * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
513 * an INIT ACK using the same parameters it sent in its original
514 * INIT chunk (including its Initiate Tag, unchanged”).
515 */
516 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
517 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
518 /*
519 * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
520 * “If an INIT ACK is received by an endpoint in any state other
521 * than the COOKIE-WAIT state, the endpoint should discard the
522 * INIT ACK chunk”. Stay in the same state
523 */
524 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
525 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
526 /*
527 * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
528 * peer and peer shall move to the ESTABISHED. if it doesn't handle
529 * it will send ERROR chunk. So, stay in the same state
530 */
531 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
532 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
533 /*
534 * COOKIE ACK from client, not sure what to do stay in the same state
535 */
536 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
537 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
538 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
539 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
540 /*
541 * SHUTDOWN from the client, move to SHUDDOWN_CLI
542 */
543 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
544 /*
545 * SHUTDOWN from the server, move to SHUTDOWN_SER
546 */
547 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
548 /*
549 * client sent SHUDTDOWN_ACK, this should not happen, let's close
550 * the connection
551 */
552 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
553 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
554 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
555 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
556 },
557 /*
558 * State : IP_VS_SCTP_S_SHUT_CLI
559 * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server
560 */
561 /*
562 * We received the data chuck, keep the state unchanged. I assume
563 * that still data chuncks can be received by both the peers in
564 * SHUDOWN state
565 */
566
567 {{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
568 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
569 /*
570 * We have got an INIT from client. From the spec.“Upon receipt of
571 * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
572 * an INIT ACK using the same parameters it sent in its original
573 * INIT chunk (including its Initiate Tag, unchanged”).
574 */
575 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
576 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
577 /*
578 * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
579 * “If an INIT ACK is received by an endpoint in any state other
580 * than the COOKIE-WAIT state, the endpoint should discard the
581 * INIT ACK chunk”. Stay in the same state
582 */
583 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
584 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
585 /*
586 * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
587 * peer and peer shall move to the ESTABISHED. if it doesn't handle
588 * it will send ERROR chunk. So, stay in the same state
589 */
590 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
591 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
592 /*
593 * COOKIE ACK from client, not sure what to do stay in the same state
594 */
595 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
596 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
597 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
598 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
599 /*
600 * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
601 */
602 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
603 /*
604 * SHUTDOWN from the server, move to SHUTDOWN_SER
605 */
606 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
607 /*
608 * client sent SHUDTDOWN_ACK, this should not happen, let's close
609 * the connection
610 */
611 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
612 /*
613 * Server sent SHUTDOWN ACK, this is what we are expecting, let's move
614 * to SHUDOWN_ACK_SER
615 */
616 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
617 /*
618 * SHUTDOWN COM from client, this should not happen, let's close the
619 * connection
620 */
621 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
622 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
623 },
624 /*
625 * State : IP_VS_SCTP_S_SHUT_SER
626 * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client
627 */
628 /*
629 * We received the data chuck, keep the state unchanged. I assume
630 * that still data chuncks can be received by both the peers in
631 * SHUDOWN state
632 */
633
634 {{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
635 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
636 /*
637 * We have got an INIT from client. From the spec.“Upon receipt of
638 * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
639 * an INIT ACK using the same parameters it sent in its original
640 * INIT chunk (including its Initiate Tag, unchanged”).
641 */
642 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
643 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
644 /*
645 * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
646 * “If an INIT ACK is received by an endpoint in any state other
647 * than the COOKIE-WAIT state, the endpoint should discard the
648 * INIT ACK chunk”. Stay in the same state
649 */
650 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
651 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
652 /*
653 * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
654 * peer and peer shall move to the ESTABISHED. if it doesn't handle
655 * it will send ERROR chunk. So, stay in the same state
656 */
657 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
658 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
659 /*
660 * COOKIE ACK from client, not sure what to do stay in the same state
661 */
662 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
663 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
664 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
665 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
666 /*
667 * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
668 */
669 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
670 /*
671 * SHUTDOWN resent from the server, move to SHUTDOWN_SER
672 */
673 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
674 /*
675 * client sent SHUDTDOWN_ACK, this is what we are expecting, let's
676 * move to SHUT_ACK_CLI
677 */
678 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
679 /*
680 * Server sent SHUTDOWN ACK, this should not happen, let's close the
681 * connection
682 */
683 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
684 /*
685 * SHUTDOWN COM from client, this should not happen, let's close the
686 * connection
687 */
688 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
689 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
690 },
691
692 /*
693 * State : IP_VS_SCTP_S_SHUT_ACK_CLI
694 * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server
695 */
696 /*
697 * We received the data chuck, keep the state unchanged. I assume
698 * that still data chuncks can be received by both the peers in
699 * SHUDOWN state
700 */
701
702 {{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
703 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
704 /*
705 * We have got an INIT from client. From the spec.“Upon receipt of
706 * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
707 * an INIT ACK using the same parameters it sent in its original
708 * INIT chunk (including its Initiate Tag, unchanged”).
709 */
710 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
711 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
712 /*
713 * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
714 * “If an INIT ACK is received by an endpoint in any state other
715 * than the COOKIE-WAIT state, the endpoint should discard the
716 * INIT ACK chunk”. Stay in the same state
717 */
718 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
719 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
720 /*
721 * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
722 * peer and peer shall move to the ESTABISHED. if it doesn't handle
723 * it will send ERROR chunk. So, stay in the same state
724 */
725 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
726 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
727 /*
728 * COOKIE ACK from client, not sure what to do stay in the same state
729 */
730 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
731 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
732 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
733 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
734 /*
735 * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
736 */
737 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
738 /*
739 * SHUTDOWN sent from the server, move to SHUTDOWN_SER
740 */
741 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
742 /*
743 * client resent SHUDTDOWN_ACK, let's stay in the same state
744 */
745 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
746 /*
747 * Server sent SHUTDOWN ACK, this should not happen, let's close the
748 * connection
749 */
750 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
751 /*
752 * SHUTDOWN COM from client, this should not happen, let's close the
753 * connection
754 */
755 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
756 /*
757 * SHUTDOWN COMPLETE from server this is what we are expecting.
758 */
759 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
760 },
761
762 /*
763 * State : IP_VS_SCTP_S_SHUT_ACK_SER
764 * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client
765 */
766 /*
767 * We received the data chuck, keep the state unchanged. I assume
768 * that still data chuncks can be received by both the peers in
769 * SHUDOWN state
770 */
771 228
772 {{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, 229#define sNO IP_VS_SCTP_S_NONE
773 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, 230#define sI1 IP_VS_SCTP_S_INIT1
774 /* 231#define sIN IP_VS_SCTP_S_INIT
775 * We have got an INIT from client. From the spec.“Upon receipt of 232#define sCS IP_VS_SCTP_S_COOKIE_SENT
776 * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with 233#define sCR IP_VS_SCTP_S_COOKIE_REPLIED
777 * an INIT ACK using the same parameters it sent in its original 234#define sCW IP_VS_SCTP_S_COOKIE_WAIT
778 * INIT chunk (including its Initiate Tag, unchanged”). 235#define sCO IP_VS_SCTP_S_COOKIE
779 */ 236#define sCE IP_VS_SCTP_S_COOKIE_ECHOED
780 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, 237#define sES IP_VS_SCTP_S_ESTABLISHED
781 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, 238#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
782 /* 239#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
783 * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, 240#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
784 * “If an INIT ACK is received by an endpoint in any state other 241#define sRJ IP_VS_SCTP_S_REJECTED
785 * than the COOKIE-WAIT state, the endpoint should discard the 242#define sCL IP_VS_SCTP_S_CLOSED
786 * INIT ACK chunk”. Stay in the same state 243
787 */ 244static const __u8 sctp_states
788 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, 245 [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
789 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, 246 { /* INPUT */
790 /* 247/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
791 * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the 248/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
792 * peer and peer shall move to the ESTABISHED. if it doesn't handle 249/* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
793 * it will send ERROR chunk. So, stay in the same state 250/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
794 */ 251/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
795 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, 252/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
796 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, 253/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
797 /* 254/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
798 * COOKIE ACK from client, not sure what to do stay in the same state 255/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
799 */ 256/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
800 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, 257/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
801 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, 258 },
802 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, 259 { /* OUTPUT */
803 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, 260/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
804 /* 261/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
805 * SHUTDOWN sent from the client, move to SHUDDOWN_CLI 262/* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
806 */ 263/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
807 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, 264/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
808 /* 265/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
809 * SHUTDOWN sent from the server, move to SHUTDOWN_SER 266/* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
810 */ 267/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
811 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, 268/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
812 /* 269/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
813 * client sent SHUDTDOWN_ACK, this should not happen let's close 270/* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
814 * the connection. 271 },
815 */ 272 { /* INPUT-ONLY */
816 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, 273/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
817 /* 274/* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
818 * Server resent SHUTDOWN ACK, stay in the same state 275/* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
819 */ 276/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
820 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, 277/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
821 /* 278/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
822 * SHUTDOWN COM from client, this what we are expecting, let's close 279/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
823 * the connection 280/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
824 */ 281/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
825 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, 282/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
826 /* 283/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
827 * SHUTDOWN COMPLETE from server this should not happen. 284 },
828 */
829 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
830 },
831 /*
832 * State : IP_VS_SCTP_S_CLOSED
833 */
834 {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
835 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
836 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
837 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
838 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
839 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
840 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
841 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
842 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
843 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
844 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
845 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
846 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
847 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
848 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
849 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
850 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
851 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
852 }
853}; 285};
854 286
855/* 287#define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ)
856 * Timeout table[state] 288
857 */ 289/* Timeout table[state] */
858static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { 290static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
859 [IP_VS_SCTP_S_NONE] = 2 * HZ, 291 [IP_VS_SCTP_S_NONE] = 2 * HZ,
860 [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, 292 [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ,
861 [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, 293 [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO,
862 [IP_VS_SCTP_S_INIT_ACK_CLI] = 1 * 60 * HZ, 294 [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO,
863 [IP_VS_SCTP_S_INIT_ACK_SER] = 1 * 60 * HZ, 295 [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO,
864 [IP_VS_SCTP_S_ECHO_CLI] = 1 * 60 * HZ, 296 [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO,
865 [IP_VS_SCTP_S_ECHO_SER] = 1 * 60 * HZ, 297 [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO,
866 [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, 298 [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO,
867 [IP_VS_SCTP_S_SHUT_CLI] = 1 * 60 * HZ, 299 [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ,
868 [IP_VS_SCTP_S_SHUT_SER] = 1 * 60 * HZ, 300 [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO,
869 [IP_VS_SCTP_S_SHUT_ACK_CLI] = 1 * 60 * HZ, 301 [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO,
870 [IP_VS_SCTP_S_SHUT_ACK_SER] = 1 * 60 * HZ, 302 [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO,
871 [IP_VS_SCTP_S_CLOSED] = 10 * HZ, 303 [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ,
872 [IP_VS_SCTP_S_LAST] = 2 * HZ, 304 [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO,
305 [IP_VS_SCTP_S_LAST] = 2 * HZ,
873}; 306};
874 307
875static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { 308static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
876 [IP_VS_SCTP_S_NONE] = "NONE", 309 [IP_VS_SCTP_S_NONE] = "NONE",
877 [IP_VS_SCTP_S_INIT_CLI] = "INIT_CLI", 310 [IP_VS_SCTP_S_INIT1] = "INIT1",
878 [IP_VS_SCTP_S_INIT_SER] = "INIT_SER", 311 [IP_VS_SCTP_S_INIT] = "INIT",
879 [IP_VS_SCTP_S_INIT_ACK_CLI] = "INIT_ACK_CLI", 312 [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT",
880 [IP_VS_SCTP_S_INIT_ACK_SER] = "INIT_ACK_SER", 313 [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED",
881 [IP_VS_SCTP_S_ECHO_CLI] = "COOKIE_ECHO_CLI", 314 [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT",
882 [IP_VS_SCTP_S_ECHO_SER] = "COOKIE_ECHO_SER", 315 [IP_VS_SCTP_S_COOKIE] = "COOKIE",
883 [IP_VS_SCTP_S_ESTABLISHED] = "ESTABISHED", 316 [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED",
884 [IP_VS_SCTP_S_SHUT_CLI] = "SHUTDOWN_CLI", 317 [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED",
885 [IP_VS_SCTP_S_SHUT_SER] = "SHUTDOWN_SER", 318 [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT",
886 [IP_VS_SCTP_S_SHUT_ACK_CLI] = "SHUTDOWN_ACK_CLI", 319 [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED",
887 [IP_VS_SCTP_S_SHUT_ACK_SER] = "SHUTDOWN_ACK_SER", 320 [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT",
888 [IP_VS_SCTP_S_CLOSED] = "CLOSED", 321 [IP_VS_SCTP_S_REJECTED] = "REJECTED",
889 [IP_VS_SCTP_S_LAST] = "BUG!" 322 [IP_VS_SCTP_S_CLOSED] = "CLOSED",
323 [IP_VS_SCTP_S_LAST] = "BUG!",
890}; 324};
891 325
892 326
@@ -943,17 +377,20 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
943 } 377 }
944 } 378 }
945 379
946 event = sctp_events[chunk_type]; 380 event = (chunk_type < sizeof(sctp_events)) ?
381 sctp_events[chunk_type] : IP_VS_SCTP_DATA;
947 382
948 /* 383 /* Update direction to INPUT_ONLY if necessary
949 * If the direction is IP_VS_DIR_OUTPUT, this event is from server 384 * or delete NO_OUTPUT flag if output packet detected
950 */
951 if (direction == IP_VS_DIR_OUTPUT)
952 event++;
953 /*
954 * get next state
955 */ 385 */
956 next_state = sctp_states_table[cp->state][event].next_state; 386 if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
387 if (direction == IP_VS_DIR_OUTPUT)
388 cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
389 else
390 direction = IP_VS_DIR_INPUT_ONLY;
391 }
392
393 next_state = sctp_states[direction][event][cp->state];
957 394
958 if (next_state != cp->state) { 395 if (next_state != cp->state) {
959 struct ip_vs_dest *dest = cp->dest; 396 struct ip_vs_dest *dest = cp->dest;
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 50a15944c6c1..e3a697234a98 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -39,6 +39,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
39 struct net *net; 39 struct net *net;
40 struct ip_vs_service *svc; 40 struct ip_vs_service *svc;
41 struct tcphdr _tcph, *th; 41 struct tcphdr _tcph, *th;
42 struct netns_ipvs *ipvs;
42 43
43 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); 44 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
44 if (th == NULL) { 45 if (th == NULL) {
@@ -46,14 +47,15 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
46 return 0; 47 return 0;
47 } 48 }
48 net = skb_net(skb); 49 net = skb_net(skb);
50 ipvs = net_ipvs(net);
49 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 51 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
50 rcu_read_lock(); 52 rcu_read_lock();
51 if (th->syn && 53 if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst &&
52 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, 54 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
53 &iph->daddr, th->dest))) { 55 &iph->daddr, th->dest))) {
54 int ignored; 56 int ignored;
55 57
56 if (ip_vs_todrop(net_ipvs(net))) { 58 if (ip_vs_todrop(ipvs)) {
57 /* 59 /*
58 * It seems that we are very loaded. 60 * It seems that we are very loaded.
59 * We have to drop this packet :( 61 * We have to drop this packet :(
@@ -401,7 +403,7 @@ static struct tcp_states_t tcp_states [] = {
401/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ 403/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
402/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, 404/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
403/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }}, 405/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
404/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, 406/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
405/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }}, 407/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
406 408
407/* OUTPUT */ 409/* OUTPUT */
@@ -415,7 +417,7 @@ static struct tcp_states_t tcp_states [] = {
415/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ 417/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
416/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, 418/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
417/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, 419/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
418/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, 420/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
419/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 421/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
420}; 422};
421 423
@@ -424,7 +426,7 @@ static struct tcp_states_t tcp_states_dos [] = {
424/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ 426/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
425/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }}, 427/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
426/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }}, 428/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
427/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, 429/*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
428/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 430/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
429 431
430/* OUTPUT */ 432/* OUTPUT */
@@ -438,7 +440,7 @@ static struct tcp_states_t tcp_states_dos [] = {
438/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ 440/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
439/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }}, 441/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
440/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, 442/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
441/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, 443/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
442/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 444/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
443}; 445};
444 446
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index c35986c793d9..176b87c35e34 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -55,7 +55,8 @@ static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
55 * Round-Robin Scheduling 55 * Round-Robin Scheduling
56 */ 56 */
57static struct ip_vs_dest * 57static struct ip_vs_dest *
58ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 58ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
59 struct ip_vs_iphdr *iph)
59{ 60{
60 struct list_head *p; 61 struct list_head *p;
61 struct ip_vs_dest *dest, *last; 62 struct ip_vs_dest *dest, *last;
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index f3205925359a..e446b9fa7424 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -44,7 +44,7 @@
44#include <net/ip_vs.h> 44#include <net/ip_vs.h>
45 45
46 46
47static inline unsigned int 47static inline int
48ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) 48ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
49{ 49{
50 /* 50 /*
@@ -59,10 +59,11 @@ ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
59 * Weighted Least Connection scheduling 59 * Weighted Least Connection scheduling
60 */ 60 */
61static struct ip_vs_dest * 61static struct ip_vs_dest *
62ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 62ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
63 struct ip_vs_iphdr *iph)
63{ 64{
64 struct ip_vs_dest *dest, *least; 65 struct ip_vs_dest *dest, *least;
65 unsigned int loh, doh; 66 int loh, doh;
66 67
67 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 68 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
68 69
@@ -98,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
98 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 99 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
99 continue; 100 continue;
100 doh = ip_vs_sed_dest_overhead(dest); 101 doh = ip_vs_sed_dest_overhead(dest);
101 if (loh * atomic_read(&dest->weight) > 102 if ((__s64)loh * atomic_read(&dest->weight) >
102 doh * atomic_read(&least->weight)) { 103 (__s64)doh * atomic_read(&least->weight)) {
103 least = dest; 104 least = dest;
104 loh = doh; 105 loh = doh;
105 } 106 }
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index a65edfe4b16c..3588faebe529 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -48,6 +48,10 @@
48 48
49#include <net/ip_vs.h> 49#include <net/ip_vs.h>
50 50
51#include <net/tcp.h>
52#include <linux/udp.h>
53#include <linux/sctp.h>
54
51 55
52/* 56/*
53 * IPVS SH bucket 57 * IPVS SH bucket
@@ -71,10 +75,19 @@ struct ip_vs_sh_state {
71 struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; 75 struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE];
72}; 76};
73 77
78/* Helper function to determine if server is unavailable */
79static inline bool is_unavailable(struct ip_vs_dest *dest)
80{
81 return atomic_read(&dest->weight) <= 0 ||
82 dest->flags & IP_VS_DEST_F_OVERLOAD;
83}
84
74/* 85/*
75 * Returns hash value for IPVS SH entry 86 * Returns hash value for IPVS SH entry
76 */ 87 */
77static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr) 88static inline unsigned int
89ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
90 __be16 port, unsigned int offset)
78{ 91{
79 __be32 addr_fold = addr->ip; 92 __be32 addr_fold = addr->ip;
80 93
@@ -83,7 +96,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
83 addr_fold = addr->ip6[0]^addr->ip6[1]^ 96 addr_fold = addr->ip6[0]^addr->ip6[1]^
84 addr->ip6[2]^addr->ip6[3]; 97 addr->ip6[2]^addr->ip6[3];
85#endif 98#endif
86 return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK; 99 return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) &
100 IP_VS_SH_TAB_MASK;
87} 101}
88 102
89 103
@@ -91,12 +105,42 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
91 * Get ip_vs_dest associated with supplied parameters. 105 * Get ip_vs_dest associated with supplied parameters.
92 */ 106 */
93static inline struct ip_vs_dest * 107static inline struct ip_vs_dest *
94ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr) 108ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
109 const union nf_inet_addr *addr, __be16 port)
95{ 110{
96 return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest); 111 unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
112 struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest);
113
114 return (!dest || is_unavailable(dest)) ? NULL : dest;
97} 115}
98 116
99 117
118/* As ip_vs_sh_get, but with fallback if selected server is unavailable */
119static inline struct ip_vs_dest *
120ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
121 const union nf_inet_addr *addr, __be16 port)
122{
123 unsigned int offset;
124 unsigned int hash;
125 struct ip_vs_dest *dest;
126
127 for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
128 hash = ip_vs_sh_hashkey(svc->af, addr, port, offset);
129 dest = rcu_dereference(s->buckets[hash].dest);
130 if (!dest)
131 break;
132 if (is_unavailable(dest))
133 IP_VS_DBG_BUF(6, "SH: selected unavailable server "
134 "%s:%d (offset %d)",
135 IP_VS_DBG_ADDR(svc->af, &dest->addr),
136 ntohs(dest->port), offset);
137 else
138 return dest;
139 }
140
141 return NULL;
142}
143
100/* 144/*
101 * Assign all the hash buckets of the specified table with the service. 145 * Assign all the hash buckets of the specified table with the service.
102 */ 146 */
@@ -213,13 +257,39 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
213} 257}
214 258
215 259
216/* 260/* Helper function to get port number */
217 * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, 261static inline __be16
218 * consider that the server is overloaded here. 262ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
219 */
220static inline int is_overloaded(struct ip_vs_dest *dest)
221{ 263{
222 return dest->flags & IP_VS_DEST_F_OVERLOAD; 264 __be16 port;
265 struct tcphdr _tcph, *th;
266 struct udphdr _udph, *uh;
267 sctp_sctphdr_t _sctph, *sh;
268
269 switch (iph->protocol) {
270 case IPPROTO_TCP:
271 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
272 if (unlikely(th == NULL))
273 return 0;
274 port = th->source;
275 break;
276 case IPPROTO_UDP:
277 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
278 if (unlikely(uh == NULL))
279 return 0;
280 port = uh->source;
281 break;
282 case IPPROTO_SCTP:
283 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
284 if (unlikely(sh == NULL))
285 return 0;
286 port = sh->source;
287 break;
288 default:
289 port = 0;
290 }
291
292 return port;
223} 293}
224 294
225 295
@@ -227,28 +297,32 @@ static inline int is_overloaded(struct ip_vs_dest *dest)
227 * Source Hashing scheduling 297 * Source Hashing scheduling
228 */ 298 */
229static struct ip_vs_dest * 299static struct ip_vs_dest *
230ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 300ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
301 struct ip_vs_iphdr *iph)
231{ 302{
232 struct ip_vs_dest *dest; 303 struct ip_vs_dest *dest;
233 struct ip_vs_sh_state *s; 304 struct ip_vs_sh_state *s;
234 struct ip_vs_iphdr iph; 305 __be16 port = 0;
235
236 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
237 306
238 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 307 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
239 308
309 if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT)
310 port = ip_vs_sh_get_port(skb, iph);
311
240 s = (struct ip_vs_sh_state *) svc->sched_data; 312 s = (struct ip_vs_sh_state *) svc->sched_data;
241 dest = ip_vs_sh_get(svc->af, s, &iph.saddr); 313
242 if (!dest 314 if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
243 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 315 dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
244 || atomic_read(&dest->weight) <= 0 316 else
245 || is_overloaded(dest)) { 317 dest = ip_vs_sh_get(svc, s, &iph->saddr, port);
318
319 if (!dest) {
246 ip_vs_scheduler_err(svc, "no destination available"); 320 ip_vs_scheduler_err(svc, "no destination available");
247 return NULL; 321 return NULL;
248 } 322 }
249 323
250 IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", 324 IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
251 IP_VS_DBG_ADDR(svc->af, &iph.saddr), 325 IP_VS_DBG_ADDR(svc->af, &iph->saddr),
252 IP_VS_DBG_ADDR(svc->af, &dest->addr), 326 IP_VS_DBG_ADDR(svc->af, &dest->addr),
253 ntohs(dest->port)); 327 ntohs(dest->port));
254 328
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index f6046d9af8d3..f4484719f3e6 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -425,6 +425,16 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
425 return sb; 425 return sb;
426} 426}
427 427
428/* Check if connection is controlled by persistence */
429static inline bool in_persistence(struct ip_vs_conn *cp)
430{
431 for (cp = cp->control; cp; cp = cp->control) {
432 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
433 return true;
434 }
435 return false;
436}
437
428/* Check if conn should be synced. 438/* Check if conn should be synced.
429 * pkts: conn packets, use sysctl_sync_threshold to avoid packet check 439 * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
430 * - (1) sync_refresh_period: reduce sync rate. Additionally, retry 440 * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
@@ -447,6 +457,8 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
447 /* Check if we sync in current state */ 457 /* Check if we sync in current state */
448 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) 458 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
449 force = 0; 459 force = 0;
460 else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp)))
461 return 0;
450 else if (likely(cp->protocol == IPPROTO_TCP)) { 462 else if (likely(cp->protocol == IPPROTO_TCP)) {
451 if (!((1 << cp->state) & 463 if (!((1 << cp->state) &
452 ((1 << IP_VS_TCP_S_ESTABLISHED) | 464 ((1 << IP_VS_TCP_S_ESTABLISHED) |
@@ -461,9 +473,10 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
461 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { 473 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
462 if (!((1 << cp->state) & 474 if (!((1 << cp->state) &
463 ((1 << IP_VS_SCTP_S_ESTABLISHED) | 475 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
464 (1 << IP_VS_SCTP_S_CLOSED) | 476 (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) |
465 (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) | 477 (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) |
466 (1 << IP_VS_SCTP_S_SHUT_ACK_SER)))) 478 (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) |
479 (1 << IP_VS_SCTP_S_CLOSED))))
467 return 0; 480 return 0;
468 force = cp->state != cp->old_state; 481 force = cp->state != cp->old_state;
469 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) 482 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index c60a81c4ce9a..b5b4650d50a9 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -31,10 +31,11 @@
31 * Weighted Least Connection scheduling 31 * Weighted Least Connection scheduling
32 */ 32 */
33static struct ip_vs_dest * 33static struct ip_vs_dest *
34ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 34ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
35 struct ip_vs_iphdr *iph)
35{ 36{
36 struct ip_vs_dest *dest, *least; 37 struct ip_vs_dest *dest, *least;
37 unsigned int loh, doh; 38 int loh, doh;
38 39
39 IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); 40 IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
40 41
@@ -70,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
70 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 71 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
71 continue; 72 continue;
72 doh = ip_vs_dest_conn_overhead(dest); 73 doh = ip_vs_dest_conn_overhead(dest);
73 if (loh * atomic_read(&dest->weight) > 74 if ((__s64)loh * atomic_read(&dest->weight) >
74 doh * atomic_read(&least->weight)) { 75 (__s64)doh * atomic_read(&least->weight)) {
75 least = dest; 76 least = dest;
76 loh = doh; 77 loh = doh;
77 } 78 }
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 0e68555bceb9..0546cd572d6b 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -162,7 +162,8 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc,
162 * Weighted Round-Robin Scheduling 162 * Weighted Round-Robin Scheduling
163 */ 163 */
164static struct ip_vs_dest * 164static struct ip_vs_dest *
165ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 165ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
166 struct ip_vs_iphdr *iph)
166{ 167{
167 struct ip_vs_dest *dest, *last, *stop = NULL; 168 struct ip_vs_dest *dest, *last, *stop = NULL;
168 struct ip_vs_wrr_mark *mark = svc->sched_data; 169 struct ip_vs_wrr_mark *mark = svc->sched_data;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b75ff6429a04..c47444e4cf8c 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
883 iph->daddr = cp->daddr.ip; 883 iph->daddr = cp->daddr.ip;
884 iph->saddr = saddr; 884 iph->saddr = saddr;
885 iph->ttl = old_iph->ttl; 885 iph->ttl = old_iph->ttl;
886 ip_select_ident(iph, &rt->dst, NULL); 886 ip_select_ident(skb, &rt->dst, NULL);
887 887
888 /* Another hack: avoid icmp_send in ip_fragment */ 888 /* Another hack: avoid icmp_send in ip_fragment */
889 skb->local_df = 1; 889 skb->local_df = 1;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0283baedcdfb..5d892febd64c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -39,6 +39,7 @@
39#include <net/netfilter/nf_conntrack_l4proto.h> 39#include <net/netfilter/nf_conntrack_l4proto.h>
40#include <net/netfilter/nf_conntrack_expect.h> 40#include <net/netfilter/nf_conntrack_expect.h>
41#include <net/netfilter/nf_conntrack_helper.h> 41#include <net/netfilter/nf_conntrack_helper.h>
42#include <net/netfilter/nf_conntrack_seqadj.h>
42#include <net/netfilter/nf_conntrack_core.h> 43#include <net/netfilter/nf_conntrack_core.h>
43#include <net/netfilter/nf_conntrack_extend.h> 44#include <net/netfilter/nf_conntrack_extend.h>
44#include <net/netfilter/nf_conntrack_acct.h> 45#include <net/netfilter/nf_conntrack_acct.h>
@@ -47,6 +48,7 @@
47#include <net/netfilter/nf_conntrack_timestamp.h> 48#include <net/netfilter/nf_conntrack_timestamp.h>
48#include <net/netfilter/nf_conntrack_timeout.h> 49#include <net/netfilter/nf_conntrack_timeout.h>
49#include <net/netfilter/nf_conntrack_labels.h> 50#include <net/netfilter/nf_conntrack_labels.h>
51#include <net/netfilter/nf_conntrack_synproxy.h>
50#include <net/netfilter/nf_nat.h> 52#include <net/netfilter/nf_nat.h>
51#include <net/netfilter/nf_nat_core.h> 53#include <net/netfilter/nf_nat_core.h>
52#include <net/netfilter/nf_nat_helper.h> 54#include <net/netfilter/nf_nat_helper.h>
@@ -238,7 +240,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
238 nf_conntrack_free(ct); 240 nf_conntrack_free(ct);
239} 241}
240 242
241void nf_ct_delete_from_lists(struct nf_conn *ct) 243static void nf_ct_delete_from_lists(struct nf_conn *ct)
242{ 244{
243 struct net *net = nf_ct_net(ct); 245 struct net *net = nf_ct_net(ct);
244 246
@@ -253,7 +255,6 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
253 &net->ct.dying); 255 &net->ct.dying);
254 spin_unlock_bh(&nf_conntrack_lock); 256 spin_unlock_bh(&nf_conntrack_lock);
255} 257}
256EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
257 258
258static void death_by_event(unsigned long ul_conntrack) 259static void death_by_event(unsigned long ul_conntrack)
259{ 260{
@@ -275,7 +276,7 @@ static void death_by_event(unsigned long ul_conntrack)
275 nf_ct_put(ct); 276 nf_ct_put(ct);
276} 277}
277 278
278void nf_ct_dying_timeout(struct nf_conn *ct) 279static void nf_ct_dying_timeout(struct nf_conn *ct)
279{ 280{
280 struct net *net = nf_ct_net(ct); 281 struct net *net = nf_ct_net(ct);
281 struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); 282 struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
@@ -288,27 +289,33 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
288 (prandom_u32() % net->ct.sysctl_events_retry_timeout); 289 (prandom_u32() % net->ct.sysctl_events_retry_timeout);
289 add_timer(&ecache->timeout); 290 add_timer(&ecache->timeout);
290} 291}
291EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
292 292
293static void death_by_timeout(unsigned long ul_conntrack) 293bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
294{ 294{
295 struct nf_conn *ct = (void *)ul_conntrack;
296 struct nf_conn_tstamp *tstamp; 295 struct nf_conn_tstamp *tstamp;
297 296
298 tstamp = nf_conn_tstamp_find(ct); 297 tstamp = nf_conn_tstamp_find(ct);
299 if (tstamp && tstamp->stop == 0) 298 if (tstamp && tstamp->stop == 0)
300 tstamp->stop = ktime_to_ns(ktime_get_real()); 299 tstamp->stop = ktime_to_ns(ktime_get_real());
301 300
302 if (!test_bit(IPS_DYING_BIT, &ct->status) && 301 if (!nf_ct_is_dying(ct) &&
303 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { 302 unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
303 portid, report) < 0)) {
304 /* destroy event was not delivered */ 304 /* destroy event was not delivered */
305 nf_ct_delete_from_lists(ct); 305 nf_ct_delete_from_lists(ct);
306 nf_ct_dying_timeout(ct); 306 nf_ct_dying_timeout(ct);
307 return; 307 return false;
308 } 308 }
309 set_bit(IPS_DYING_BIT, &ct->status); 309 set_bit(IPS_DYING_BIT, &ct->status);
310 nf_ct_delete_from_lists(ct); 310 nf_ct_delete_from_lists(ct);
311 nf_ct_put(ct); 311 nf_ct_put(ct);
312 return true;
313}
314EXPORT_SYMBOL_GPL(nf_ct_delete);
315
316static void death_by_timeout(unsigned long ul_conntrack)
317{
318 nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
312} 319}
313 320
314/* 321/*
@@ -643,10 +650,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
643 return dropped; 650 return dropped;
644 651
645 if (del_timer(&ct->timeout)) { 652 if (del_timer(&ct->timeout)) {
646 death_by_timeout((unsigned long)ct); 653 if (nf_ct_delete(ct, 0, 0)) {
647 /* Check if we indeed killed this entry. Reliable event
648 delivery may have inserted it into the dying list. */
649 if (test_bit(IPS_DYING_BIT, &ct->status)) {
650 dropped = 1; 654 dropped = 1;
651 NF_CT_STAT_INC_ATOMIC(net, early_drop); 655 NF_CT_STAT_INC_ATOMIC(net, early_drop);
652 } 656 }
@@ -796,6 +800,11 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
796 if (IS_ERR(ct)) 800 if (IS_ERR(ct))
797 return (struct nf_conntrack_tuple_hash *)ct; 801 return (struct nf_conntrack_tuple_hash *)ct;
798 802
803 if (tmpl && nfct_synproxy(tmpl)) {
804 nfct_seqadj_ext_add(ct);
805 nfct_synproxy_ext_add(ct);
806 }
807
799 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; 808 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
800 if (timeout_ext) 809 if (timeout_ext)
801 timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); 810 timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
@@ -1192,7 +1201,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
1192#endif 1201#endif
1193 1202
1194/* Used by ipt_REJECT and ip6t_REJECT. */ 1203/* Used by ipt_REJECT and ip6t_REJECT. */
1195static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) 1204static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
1196{ 1205{
1197 struct nf_conn *ct; 1206 struct nf_conn *ct;
1198 enum ip_conntrack_info ctinfo; 1207 enum ip_conntrack_info ctinfo;
@@ -1244,7 +1253,7 @@ found:
1244 1253
1245void nf_ct_iterate_cleanup(struct net *net, 1254void nf_ct_iterate_cleanup(struct net *net,
1246 int (*iter)(struct nf_conn *i, void *data), 1255 int (*iter)(struct nf_conn *i, void *data),
1247 void *data) 1256 void *data, u32 portid, int report)
1248{ 1257{
1249 struct nf_conn *ct; 1258 struct nf_conn *ct;
1250 unsigned int bucket = 0; 1259 unsigned int bucket = 0;
@@ -1252,7 +1261,8 @@ void nf_ct_iterate_cleanup(struct net *net,
1252 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { 1261 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
1253 /* Time to push up daises... */ 1262 /* Time to push up daises... */
1254 if (del_timer(&ct->timeout)) 1263 if (del_timer(&ct->timeout))
1255 death_by_timeout((unsigned long)ct); 1264 nf_ct_delete(ct, portid, report);
1265
1256 /* ... else the timer will get him soon. */ 1266 /* ... else the timer will get him soon. */
1257 1267
1258 nf_ct_put(ct); 1268 nf_ct_put(ct);
@@ -1260,30 +1270,6 @@ void nf_ct_iterate_cleanup(struct net *net,
1260} 1270}
1261EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); 1271EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
1262 1272
1263struct __nf_ct_flush_report {
1264 u32 portid;
1265 int report;
1266};
1267
1268static int kill_report(struct nf_conn *i, void *data)
1269{
1270 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
1271 struct nf_conn_tstamp *tstamp;
1272
1273 tstamp = nf_conn_tstamp_find(i);
1274 if (tstamp && tstamp->stop == 0)
1275 tstamp->stop = ktime_to_ns(ktime_get_real());
1276
1277 /* If we fail to deliver the event, death_by_timeout() will retry */
1278 if (nf_conntrack_event_report(IPCT_DESTROY, i,
1279 fr->portid, fr->report) < 0)
1280 return 1;
1281
1282 /* Avoid the delivery of the destroy event in death_by_timeout(). */
1283 set_bit(IPS_DYING_BIT, &i->status);
1284 return 1;
1285}
1286
1287static int kill_all(struct nf_conn *i, void *data) 1273static int kill_all(struct nf_conn *i, void *data)
1288{ 1274{
1289 return 1; 1275 return 1;
@@ -1301,11 +1287,7 @@ EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
1301 1287
1302void nf_conntrack_flush_report(struct net *net, u32 portid, int report) 1288void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
1303{ 1289{
1304 struct __nf_ct_flush_report fr = { 1290 nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
1305 .portid = portid,
1306 .report = report,
1307 };
1308 nf_ct_iterate_cleanup(net, kill_report, &fr);
1309} 1291}
1310EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); 1292EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
1311 1293
@@ -1351,6 +1333,7 @@ void nf_conntrack_cleanup_end(void)
1351 nf_ct_extend_unregister(&nf_ct_zone_extend); 1333 nf_ct_extend_unregister(&nf_ct_zone_extend);
1352#endif 1334#endif
1353 nf_conntrack_proto_fini(); 1335 nf_conntrack_proto_fini();
1336 nf_conntrack_seqadj_fini();
1354 nf_conntrack_labels_fini(); 1337 nf_conntrack_labels_fini();
1355 nf_conntrack_helper_fini(); 1338 nf_conntrack_helper_fini();
1356 nf_conntrack_timeout_fini(); 1339 nf_conntrack_timeout_fini();
@@ -1386,7 +1369,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
1386i_see_dead_people: 1369i_see_dead_people:
1387 busy = 0; 1370 busy = 0;
1388 list_for_each_entry(net, net_exit_list, exit_list) { 1371 list_for_each_entry(net, net_exit_list, exit_list) {
1389 nf_ct_iterate_cleanup(net, kill_all, NULL); 1372 nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
1390 nf_ct_release_dying_list(net); 1373 nf_ct_release_dying_list(net);
1391 if (atomic_read(&net->ct.count) != 0) 1374 if (atomic_read(&net->ct.count) != 0)
1392 busy = 1; 1375 busy = 1;
@@ -1556,6 +1539,10 @@ int nf_conntrack_init_start(void)
1556 if (ret < 0) 1539 if (ret < 0)
1557 goto err_labels; 1540 goto err_labels;
1558 1541
1542 ret = nf_conntrack_seqadj_init();
1543 if (ret < 0)
1544 goto err_seqadj;
1545
1559#ifdef CONFIG_NF_CONNTRACK_ZONES 1546#ifdef CONFIG_NF_CONNTRACK_ZONES
1560 ret = nf_ct_extend_register(&nf_ct_zone_extend); 1547 ret = nf_ct_extend_register(&nf_ct_zone_extend);
1561 if (ret < 0) 1548 if (ret < 0)
@@ -1580,6 +1567,8 @@ err_proto:
1580 nf_ct_extend_unregister(&nf_ct_zone_extend); 1567 nf_ct_extend_unregister(&nf_ct_zone_extend);
1581err_extend: 1568err_extend:
1582#endif 1569#endif
1570 nf_conntrack_seqadj_fini();
1571err_seqadj:
1583 nf_conntrack_labels_fini(); 1572 nf_conntrack_labels_fini();
1584err_labels: 1573err_labels:
1585 nf_conntrack_helper_fini(); 1574 nf_conntrack_helper_fini();
@@ -1602,9 +1591,6 @@ void nf_conntrack_init_end(void)
1602 /* For use by REJECT target */ 1591 /* For use by REJECT target */
1603 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); 1592 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
1604 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); 1593 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
1605
1606 /* Howto get NAT offsets */
1607 RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
1608} 1594}
1609 1595
1610/* 1596/*
@@ -1691,8 +1677,3 @@ err_slabname:
1691err_stat: 1677err_stat:
1692 return ret; 1678 return ret;
1693} 1679}
1694
1695s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
1696 enum ip_conntrack_dir dir,
1697 u32 seq);
1698EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index c63b618cd619..4fd1ca94fd4a 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -293,6 +293,11 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
293 sizeof(exp->tuple.dst.u3) - len); 293 sizeof(exp->tuple.dst.u3) - len);
294 294
295 exp->tuple.dst.u.all = *dst; 295 exp->tuple.dst.u.all = *dst;
296
297#ifdef CONFIG_NF_NAT_NEEDED
298 memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
299 memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
300#endif
296} 301}
297EXPORT_SYMBOL_GPL(nf_ct_expect_init); 302EXPORT_SYMBOL_GPL(nf_ct_expect_init);
298 303
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 6b217074237b..b8a0924064ef 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -55,10 +55,14 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
55 struct nf_conntrack_expect *exp); 55 struct nf_conntrack_expect *exp);
56EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); 56EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
57 57
58static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); 58static int try_rfc959(const char *, size_t, struct nf_conntrack_man *,
59static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); 59 char, unsigned int *);
60static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *,
61 char, unsigned int *);
62static int try_eprt(const char *, size_t, struct nf_conntrack_man *,
63 char, unsigned int *);
60static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, 64static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
61 char); 65 char, unsigned int *);
62 66
63static struct ftp_search { 67static struct ftp_search {
64 const char *pattern; 68 const char *pattern;
@@ -66,7 +70,7 @@ static struct ftp_search {
66 char skip; 70 char skip;
67 char term; 71 char term;
68 enum nf_ct_ftp_type ftptype; 72 enum nf_ct_ftp_type ftptype;
69 int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); 73 int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *);
70} search[IP_CT_DIR_MAX][2] = { 74} search[IP_CT_DIR_MAX][2] = {
71 [IP_CT_DIR_ORIGINAL] = { 75 [IP_CT_DIR_ORIGINAL] = {
72 { 76 {
@@ -90,10 +94,8 @@ static struct ftp_search {
90 { 94 {
91 .pattern = "227 ", 95 .pattern = "227 ",
92 .plen = sizeof("227 ") - 1, 96 .plen = sizeof("227 ") - 1,
93 .skip = '(',
94 .term = ')',
95 .ftptype = NF_CT_FTP_PASV, 97 .ftptype = NF_CT_FTP_PASV,
96 .getnum = try_rfc959, 98 .getnum = try_rfc1123,
97 }, 99 },
98 { 100 {
99 .pattern = "229 ", 101 .pattern = "229 ",
@@ -132,8 +134,9 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[],
132 i++; 134 i++;
133 else { 135 else {
134 /* Unexpected character; true if it's the 136 /* Unexpected character; true if it's the
135 terminator and we're finished. */ 137 terminator (or we don't care about one)
136 if (*data == term && i == array_size - 1) 138 and we're finished. */
139 if ((*data == term || !term) && i == array_size - 1)
137 return len; 140 return len;
138 141
139 pr_debug("Char %u (got %u nums) `%u' unexpected\n", 142 pr_debug("Char %u (got %u nums) `%u' unexpected\n",
@@ -148,7 +151,8 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[],
148 151
149/* Returns 0, or length of numbers: 192,168,1,1,5,6 */ 152/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
150static int try_rfc959(const char *data, size_t dlen, 153static int try_rfc959(const char *data, size_t dlen,
151 struct nf_conntrack_man *cmd, char term) 154 struct nf_conntrack_man *cmd, char term,
155 unsigned int *offset)
152{ 156{
153 int length; 157 int length;
154 u_int32_t array[6]; 158 u_int32_t array[6];
@@ -163,6 +167,33 @@ static int try_rfc959(const char *data, size_t dlen,
163 return length; 167 return length;
164} 168}
165 169
170/*
171 * From RFC 1123:
172 * The format of the 227 reply to a PASV command is not
173 * well standardized. In particular, an FTP client cannot
174 * assume that the parentheses shown on page 40 of RFC-959
175 * will be present (and in fact, Figure 3 on page 43 omits
176 * them). Therefore, a User-FTP program that interprets
177 * the PASV reply must scan the reply for the first digit
178 * of the host and port numbers.
179 */
180static int try_rfc1123(const char *data, size_t dlen,
181 struct nf_conntrack_man *cmd, char term,
182 unsigned int *offset)
183{
184 int i;
185 for (i = 0; i < dlen; i++)
186 if (isdigit(data[i]))
187 break;
188
189 if (i == dlen)
190 return 0;
191
192 *offset += i;
193
194 return try_rfc959(data + i, dlen - i, cmd, 0, offset);
195}
196
166/* Grab port: number up to delimiter */ 197/* Grab port: number up to delimiter */
167static int get_port(const char *data, int start, size_t dlen, char delim, 198static int get_port(const char *data, int start, size_t dlen, char delim,
168 __be16 *port) 199 __be16 *port)
@@ -191,7 +222,7 @@ static int get_port(const char *data, int start, size_t dlen, char delim,
191 222
192/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */ 223/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
193static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, 224static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
194 char term) 225 char term, unsigned int *offset)
195{ 226{
196 char delim; 227 char delim;
197 int length; 228 int length;
@@ -239,7 +270,8 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
239 270
240/* Returns 0, or length of numbers: |||6446| */ 271/* Returns 0, or length of numbers: |||6446| */
241static int try_epsv_response(const char *data, size_t dlen, 272static int try_epsv_response(const char *data, size_t dlen,
242 struct nf_conntrack_man *cmd, char term) 273 struct nf_conntrack_man *cmd, char term,
274 unsigned int *offset)
243{ 275{
244 char delim; 276 char delim;
245 277
@@ -261,9 +293,10 @@ static int find_pattern(const char *data, size_t dlen,
261 unsigned int *numlen, 293 unsigned int *numlen,
262 struct nf_conntrack_man *cmd, 294 struct nf_conntrack_man *cmd,
263 int (*getnum)(const char *, size_t, 295 int (*getnum)(const char *, size_t,
264 struct nf_conntrack_man *, char)) 296 struct nf_conntrack_man *, char,
297 unsigned int *))
265{ 298{
266 size_t i; 299 size_t i = plen;
267 300
268 pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); 301 pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
269 if (dlen == 0) 302 if (dlen == 0)
@@ -293,16 +326,18 @@ static int find_pattern(const char *data, size_t dlen,
293 pr_debug("Pattern matches!\n"); 326 pr_debug("Pattern matches!\n");
294 /* Now we've found the constant string, try to skip 327 /* Now we've found the constant string, try to skip
295 to the 'skip' character */ 328 to the 'skip' character */
296 for (i = plen; data[i] != skip; i++) 329 if (skip) {
297 if (i == dlen - 1) return -1; 330 for (i = plen; data[i] != skip; i++)
331 if (i == dlen - 1) return -1;
298 332
299 /* Skip over the last character */ 333 /* Skip over the last character */
300 i++; 334 i++;
335 }
301 336
302 pr_debug("Skipped up to `%c'!\n", skip); 337 pr_debug("Skipped up to `%c'!\n", skip);
303 338
304 *numoff = i; 339 *numoff = i;
305 *numlen = getnum(data + i, dlen - i, cmd, term); 340 *numlen = getnum(data + i, dlen - i, cmd, term, numoff);
306 if (!*numlen) 341 if (!*numlen)
307 return -1; 342 return -1;
308 343
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 8fe2e99428b7..bb53f120e79c 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -8,12 +8,8 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10
11#include <linux/ctype.h>
12#include <linux/export.h> 11#include <linux/export.h>
13#include <linux/jhash.h>
14#include <linux/spinlock.h>
15#include <linux/types.h> 12#include <linux/types.h>
16#include <linux/slab.h>
17 13
18#include <net/netfilter/nf_conntrack_ecache.h> 14#include <net/netfilter/nf_conntrack_ecache.h>
19#include <net/netfilter/nf_conntrack_labels.h> 15#include <net/netfilter/nf_conntrack_labels.h>
@@ -45,7 +41,7 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit)
45 if (test_bit(bit, labels->bits)) 41 if (test_bit(bit, labels->bits))
46 return 0; 42 return 0;
47 43
48 if (test_and_set_bit(bit, labels->bits)) 44 if (!test_and_set_bit(bit, labels->bits))
49 nf_conntrack_event_cache(IPCT_LABEL, ct); 45 nf_conntrack_event_cache(IPCT_LABEL, ct);
50 46
51 return 0; 47 return 0;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6d0f8a17c5b7..eea936b70d15 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -37,6 +37,7 @@
37#include <net/netfilter/nf_conntrack_core.h> 37#include <net/netfilter/nf_conntrack_core.h>
38#include <net/netfilter/nf_conntrack_expect.h> 38#include <net/netfilter/nf_conntrack_expect.h>
39#include <net/netfilter/nf_conntrack_helper.h> 39#include <net/netfilter/nf_conntrack_helper.h>
40#include <net/netfilter/nf_conntrack_seqadj.h>
40#include <net/netfilter/nf_conntrack_l3proto.h> 41#include <net/netfilter/nf_conntrack_l3proto.h>
41#include <net/netfilter/nf_conntrack_l4proto.h> 42#include <net/netfilter/nf_conntrack_l4proto.h>
42#include <net/netfilter/nf_conntrack_tuple.h> 43#include <net/netfilter/nf_conntrack_tuple.h>
@@ -381,9 +382,8 @@ nla_put_failure:
381 return -1; 382 return -1;
382} 383}
383 384
384#ifdef CONFIG_NF_NAT_NEEDED
385static int 385static int
386dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type) 386dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type)
387{ 387{
388 struct nlattr *nest_parms; 388 struct nlattr *nest_parms;
389 389
@@ -391,12 +391,12 @@ dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
391 if (!nest_parms) 391 if (!nest_parms)
392 goto nla_put_failure; 392 goto nla_put_failure;
393 393
394 if (nla_put_be32(skb, CTA_NAT_SEQ_CORRECTION_POS, 394 if (nla_put_be32(skb, CTA_SEQADJ_CORRECTION_POS,
395 htonl(natseq->correction_pos)) || 395 htonl(seq->correction_pos)) ||
396 nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_BEFORE, 396 nla_put_be32(skb, CTA_SEQADJ_OFFSET_BEFORE,
397 htonl(natseq->offset_before)) || 397 htonl(seq->offset_before)) ||
398 nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_AFTER, 398 nla_put_be32(skb, CTA_SEQADJ_OFFSET_AFTER,
399 htonl(natseq->offset_after))) 399 htonl(seq->offset_after)))
400 goto nla_put_failure; 400 goto nla_put_failure;
401 401
402 nla_nest_end(skb, nest_parms); 402 nla_nest_end(skb, nest_parms);
@@ -408,27 +408,24 @@ nla_put_failure:
408} 408}
409 409
410static inline int 410static inline int
411ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) 411ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
412{ 412{
413 struct nf_nat_seq *natseq; 413 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
414 struct nf_conn_nat *nat = nfct_nat(ct); 414 struct nf_ct_seqadj *seq;
415 415
416 if (!(ct->status & IPS_SEQ_ADJUST) || !nat) 416 if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
417 return 0; 417 return 0;
418 418
419 natseq = &nat->seq[IP_CT_DIR_ORIGINAL]; 419 seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
420 if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1) 420 if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
421 return -1; 421 return -1;
422 422
423 natseq = &nat->seq[IP_CT_DIR_REPLY]; 423 seq = &seqadj->seq[IP_CT_DIR_REPLY];
424 if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1) 424 if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
425 return -1; 425 return -1;
426 426
427 return 0; 427 return 0;
428} 428}
429#else
430#define ctnetlink_dump_nat_seq_adj(a, b) (0)
431#endif
432 429
433static inline int 430static inline int
434ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) 431ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -502,7 +499,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
502 ctnetlink_dump_id(skb, ct) < 0 || 499 ctnetlink_dump_id(skb, ct) < 0 ||
503 ctnetlink_dump_use(skb, ct) < 0 || 500 ctnetlink_dump_use(skb, ct) < 0 ||
504 ctnetlink_dump_master(skb, ct) < 0 || 501 ctnetlink_dump_master(skb, ct) < 0 ||
505 ctnetlink_dump_nat_seq_adj(skb, ct) < 0) 502 ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
506 goto nla_put_failure; 503 goto nla_put_failure;
507 504
508 nlmsg_end(skb, nlh); 505 nlmsg_end(skb, nlh);
@@ -707,8 +704,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
707 ctnetlink_dump_master(skb, ct) < 0) 704 ctnetlink_dump_master(skb, ct) < 0)
708 goto nla_put_failure; 705 goto nla_put_failure;
709 706
710 if (events & (1 << IPCT_NATSEQADJ) && 707 if (events & (1 << IPCT_SEQADJ) &&
711 ctnetlink_dump_nat_seq_adj(skb, ct) < 0) 708 ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
712 goto nla_put_failure; 709 goto nla_put_failure;
713 } 710 }
714 711
@@ -828,7 +825,9 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple)
828 struct nf_conntrack_l3proto *l3proto; 825 struct nf_conntrack_l3proto *l3proto;
829 int ret = 0; 826 int ret = 0;
830 827
831 nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); 828 ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL);
829 if (ret < 0)
830 return ret;
832 831
833 rcu_read_lock(); 832 rcu_read_lock();
834 l3proto = __nf_ct_l3proto_find(tuple->src.l3num); 833 l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
@@ -895,7 +894,9 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
895 894
896 memset(tuple, 0, sizeof(*tuple)); 895 memset(tuple, 0, sizeof(*tuple));
897 896
898 nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); 897 err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy);
898 if (err < 0)
899 return err;
899 900
900 if (!tb[CTA_TUPLE_IP]) 901 if (!tb[CTA_TUPLE_IP])
901 return -EINVAL; 902 return -EINVAL;
@@ -946,9 +947,12 @@ static inline int
946ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, 947ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
947 struct nlattr **helpinfo) 948 struct nlattr **helpinfo)
948{ 949{
950 int err;
949 struct nlattr *tb[CTA_HELP_MAX+1]; 951 struct nlattr *tb[CTA_HELP_MAX+1];
950 952
951 nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); 953 err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy);
954 if (err < 0)
955 return err;
952 956
953 if (!tb[CTA_HELP_NAME]) 957 if (!tb[CTA_HELP_NAME])
954 return -EINVAL; 958 return -EINVAL;
@@ -1031,21 +1035,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
1031 } 1035 }
1032 } 1036 }
1033 1037
1034 if (del_timer(&ct->timeout)) { 1038 if (del_timer(&ct->timeout))
1035 if (nf_conntrack_event_report(IPCT_DESTROY, ct, 1039 nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
1036 NETLINK_CB(skb).portid, 1040
1037 nlmsg_report(nlh)) < 0) {
1038 nf_ct_delete_from_lists(ct);
1039 /* we failed to report the event, try later */
1040 nf_ct_dying_timeout(ct);
1041 nf_ct_put(ct);
1042 return 0;
1043 }
1044 /* death_by_timeout would report the event again */
1045 set_bit(IPS_DYING_BIT, &ct->status);
1046 nf_ct_delete_from_lists(ct);
1047 nf_ct_put(ct);
1048 }
1049 nf_ct_put(ct); 1041 nf_ct_put(ct);
1050 1042
1051 return 0; 1043 return 0;
@@ -1431,7 +1423,9 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
1431 struct nf_conntrack_l4proto *l4proto; 1423 struct nf_conntrack_l4proto *l4proto;
1432 int err = 0; 1424 int err = 0;
1433 1425
1434 nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); 1426 err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy);
1427 if (err < 0)
1428 return err;
1435 1429
1436 rcu_read_lock(); 1430 rcu_read_lock();
1437 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 1431 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -1442,63 +1436,65 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
1442 return err; 1436 return err;
1443} 1437}
1444 1438
1445#ifdef CONFIG_NF_NAT_NEEDED 1439static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = {
1446static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = { 1440 [CTA_SEQADJ_CORRECTION_POS] = { .type = NLA_U32 },
1447 [CTA_NAT_SEQ_CORRECTION_POS] = { .type = NLA_U32 }, 1441 [CTA_SEQADJ_OFFSET_BEFORE] = { .type = NLA_U32 },
1448 [CTA_NAT_SEQ_OFFSET_BEFORE] = { .type = NLA_U32 }, 1442 [CTA_SEQADJ_OFFSET_AFTER] = { .type = NLA_U32 },
1449 [CTA_NAT_SEQ_OFFSET_AFTER] = { .type = NLA_U32 },
1450}; 1443};
1451 1444
1452static inline int 1445static inline int
1453change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) 1446change_seq_adj(struct nf_ct_seqadj *seq, const struct nlattr * const attr)
1454{ 1447{
1455 struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; 1448 int err;
1449 struct nlattr *cda[CTA_SEQADJ_MAX+1];
1456 1450
1457 nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); 1451 err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy);
1452 if (err < 0)
1453 return err;
1458 1454
1459 if (!cda[CTA_NAT_SEQ_CORRECTION_POS]) 1455 if (!cda[CTA_SEQADJ_CORRECTION_POS])
1460 return -EINVAL; 1456 return -EINVAL;
1461 1457
1462 natseq->correction_pos = 1458 seq->correction_pos =
1463 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS])); 1459 ntohl(nla_get_be32(cda[CTA_SEQADJ_CORRECTION_POS]));
1464 1460
1465 if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE]) 1461 if (!cda[CTA_SEQADJ_OFFSET_BEFORE])
1466 return -EINVAL; 1462 return -EINVAL;
1467 1463
1468 natseq->offset_before = 1464 seq->offset_before =
1469 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE])); 1465 ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_BEFORE]));
1470 1466
1471 if (!cda[CTA_NAT_SEQ_OFFSET_AFTER]) 1467 if (!cda[CTA_SEQADJ_OFFSET_AFTER])
1472 return -EINVAL; 1468 return -EINVAL;
1473 1469
1474 natseq->offset_after = 1470 seq->offset_after =
1475 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER])); 1471 ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_AFTER]));
1476 1472
1477 return 0; 1473 return 0;
1478} 1474}
1479 1475
1480static int 1476static int
1481ctnetlink_change_nat_seq_adj(struct nf_conn *ct, 1477ctnetlink_change_seq_adj(struct nf_conn *ct,
1482 const struct nlattr * const cda[]) 1478 const struct nlattr * const cda[])
1483{ 1479{
1480 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
1484 int ret = 0; 1481 int ret = 0;
1485 struct nf_conn_nat *nat = nfct_nat(ct);
1486 1482
1487 if (!nat) 1483 if (!seqadj)
1488 return 0; 1484 return 0;
1489 1485
1490 if (cda[CTA_NAT_SEQ_ADJ_ORIG]) { 1486 if (cda[CTA_SEQ_ADJ_ORIG]) {
1491 ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL], 1487 ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
1492 cda[CTA_NAT_SEQ_ADJ_ORIG]); 1488 cda[CTA_SEQ_ADJ_ORIG]);
1493 if (ret < 0) 1489 if (ret < 0)
1494 return ret; 1490 return ret;
1495 1491
1496 ct->status |= IPS_SEQ_ADJUST; 1492 ct->status |= IPS_SEQ_ADJUST;
1497 } 1493 }
1498 1494
1499 if (cda[CTA_NAT_SEQ_ADJ_REPLY]) { 1495 if (cda[CTA_SEQ_ADJ_REPLY]) {
1500 ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY], 1496 ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
1501 cda[CTA_NAT_SEQ_ADJ_REPLY]); 1497 cda[CTA_SEQ_ADJ_REPLY]);
1502 if (ret < 0) 1498 if (ret < 0)
1503 return ret; 1499 return ret;
1504 1500
@@ -1507,7 +1503,6 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct,
1507 1503
1508 return 0; 1504 return 0;
1509} 1505}
1510#endif
1511 1506
1512static int 1507static int
1513ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) 1508ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[])
@@ -1573,13 +1568,12 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
1573 ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); 1568 ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
1574#endif 1569#endif
1575 1570
1576#ifdef CONFIG_NF_NAT_NEEDED 1571 if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
1577 if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { 1572 err = ctnetlink_change_seq_adj(ct, cda);
1578 err = ctnetlink_change_nat_seq_adj(ct, cda);
1579 if (err < 0) 1573 if (err < 0)
1580 return err; 1574 return err;
1581 } 1575 }
1582#endif 1576
1583 if (cda[CTA_LABELS]) { 1577 if (cda[CTA_LABELS]) {
1584 err = ctnetlink_attach_labels(ct, cda); 1578 err = ctnetlink_attach_labels(ct, cda);
1585 if (err < 0) 1579 if (err < 0)
@@ -1684,13 +1678,11 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1684 goto err2; 1678 goto err2;
1685 } 1679 }
1686 1680
1687#ifdef CONFIG_NF_NAT_NEEDED 1681 if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
1688 if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { 1682 err = ctnetlink_change_seq_adj(ct, cda);
1689 err = ctnetlink_change_nat_seq_adj(ct, cda);
1690 if (err < 0) 1683 if (err < 0)
1691 goto err2; 1684 goto err2;
1692 } 1685 }
1693#endif
1694 1686
1695 memset(&ct->proto, 0, sizeof(ct->proto)); 1687 memset(&ct->proto, 0, sizeof(ct->proto));
1696 if (cda[CTA_PROTOINFO]) { 1688 if (cda[CTA_PROTOINFO]) {
@@ -1804,7 +1796,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1804 (1 << IPCT_ASSURED) | 1796 (1 << IPCT_ASSURED) |
1805 (1 << IPCT_HELPER) | 1797 (1 << IPCT_HELPER) |
1806 (1 << IPCT_PROTOINFO) | 1798 (1 << IPCT_PROTOINFO) |
1807 (1 << IPCT_NATSEQADJ) | 1799 (1 << IPCT_SEQADJ) |
1808 (1 << IPCT_MARK) | events, 1800 (1 << IPCT_MARK) | events,
1809 ct, NETLINK_CB(skb).portid, 1801 ct, NETLINK_CB(skb).portid,
1810 nlmsg_report(nlh)); 1802 nlmsg_report(nlh));
@@ -1825,8 +1817,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1825 nf_conntrack_eventmask_report((1 << IPCT_REPLY) | 1817 nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
1826 (1 << IPCT_ASSURED) | 1818 (1 << IPCT_ASSURED) |
1827 (1 << IPCT_HELPER) | 1819 (1 << IPCT_HELPER) |
1820 (1 << IPCT_LABEL) |
1828 (1 << IPCT_PROTOINFO) | 1821 (1 << IPCT_PROTOINFO) |
1829 (1 << IPCT_NATSEQADJ) | 1822 (1 << IPCT_SEQADJ) |
1830 (1 << IPCT_MARK), 1823 (1 << IPCT_MARK),
1831 ct, NETLINK_CB(skb).portid, 1824 ct, NETLINK_CB(skb).portid,
1832 nlmsg_report(nlh)); 1825 nlmsg_report(nlh));
@@ -1986,6 +1979,27 @@ out:
1986 return err == -EAGAIN ? -ENOBUFS : err; 1979 return err == -EAGAIN ? -ENOBUFS : err;
1987} 1980}
1988 1981
1982static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
1983 [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
1984 [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
1985 [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
1986 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
1987 [CTA_EXPECT_ID] = { .type = NLA_U32 },
1988 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
1989 .len = NF_CT_HELPER_NAME_LEN - 1 },
1990 [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
1991 [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
1992 [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
1993 [CTA_EXPECT_NAT] = { .type = NLA_NESTED },
1994 [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
1995};
1996
1997static struct nf_conntrack_expect *
1998ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
1999 struct nf_conntrack_helper *helper,
2000 struct nf_conntrack_tuple *tuple,
2001 struct nf_conntrack_tuple *mask);
2002
1989#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT 2003#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
1990static size_t 2004static size_t
1991ctnetlink_nfqueue_build_size(const struct nf_conn *ct) 2005ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
@@ -2060,7 +2074,7 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
2060 goto nla_put_failure; 2074 goto nla_put_failure;
2061 2075
2062 if ((ct->status & IPS_SEQ_ADJUST) && 2076 if ((ct->status & IPS_SEQ_ADJUST) &&
2063 ctnetlink_dump_nat_seq_adj(skb, ct) < 0) 2077 ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
2064 goto nla_put_failure; 2078 goto nla_put_failure;
2065 2079
2066#ifdef CONFIG_NF_CONNTRACK_MARK 2080#ifdef CONFIG_NF_CONNTRACK_MARK
@@ -2115,7 +2129,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
2115 struct nlattr *cda[CTA_MAX+1]; 2129 struct nlattr *cda[CTA_MAX+1];
2116 int ret; 2130 int ret;
2117 2131
2118 nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); 2132 ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
2133 if (ret < 0)
2134 return ret;
2119 2135
2120 spin_lock_bh(&nf_conntrack_lock); 2136 spin_lock_bh(&nf_conntrack_lock);
2121 ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); 2137 ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
@@ -2124,10 +2140,70 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
2124 return ret; 2140 return ret;
2125} 2141}
2126 2142
2143static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
2144 const struct nf_conn *ct,
2145 struct nf_conntrack_tuple *tuple,
2146 struct nf_conntrack_tuple *mask)
2147{
2148 int err;
2149
2150 err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
2151 nf_ct_l3num(ct));
2152 if (err < 0)
2153 return err;
2154
2155 return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
2156 nf_ct_l3num(ct));
2157}
2158
2159static int
2160ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
2161 u32 portid, u32 report)
2162{
2163 struct nlattr *cda[CTA_EXPECT_MAX+1];
2164 struct nf_conntrack_tuple tuple, mask;
2165 struct nf_conntrack_helper *helper = NULL;
2166 struct nf_conntrack_expect *exp;
2167 int err;
2168
2169 err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy);
2170 if (err < 0)
2171 return err;
2172
2173 err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
2174 ct, &tuple, &mask);
2175 if (err < 0)
2176 return err;
2177
2178 if (cda[CTA_EXPECT_HELP_NAME]) {
2179 const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
2180
2181 helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
2182 nf_ct_protonum(ct));
2183 if (helper == NULL)
2184 return -EOPNOTSUPP;
2185 }
2186
2187 exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
2188 helper, &tuple, &mask);
2189 if (IS_ERR(exp))
2190 return PTR_ERR(exp);
2191
2192 err = nf_ct_expect_related_report(exp, portid, report);
2193 if (err < 0) {
2194 nf_ct_expect_put(exp);
2195 return err;
2196 }
2197
2198 return 0;
2199}
2200
2127static struct nfq_ct_hook ctnetlink_nfqueue_hook = { 2201static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
2128 .build_size = ctnetlink_nfqueue_build_size, 2202 .build_size = ctnetlink_nfqueue_build_size,
2129 .build = ctnetlink_nfqueue_build, 2203 .build = ctnetlink_nfqueue_build,
2130 .parse = ctnetlink_nfqueue_parse, 2204 .parse = ctnetlink_nfqueue_parse,
2205 .attach_expect = ctnetlink_nfqueue_attach_expect,
2206 .seq_adjust = nf_ct_tcp_seqadj_set,
2131}; 2207};
2132#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */ 2208#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
2133 2209
@@ -2495,21 +2571,6 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
2495 return err; 2571 return err;
2496} 2572}
2497 2573
2498static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
2499 [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
2500 [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
2501 [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
2502 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
2503 [CTA_EXPECT_ID] = { .type = NLA_U32 },
2504 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
2505 .len = NF_CT_HELPER_NAME_LEN - 1 },
2506 [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
2507 [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
2508 [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
2509 [CTA_EXPECT_NAT] = { .type = NLA_NESTED },
2510 [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
2511};
2512
2513static int 2574static int
2514ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, 2575ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
2515 const struct nlmsghdr *nlh, 2576 const struct nlmsghdr *nlh,
@@ -2710,7 +2771,9 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
2710 struct nf_conntrack_tuple nat_tuple = {}; 2771 struct nf_conntrack_tuple nat_tuple = {};
2711 int err; 2772 int err;
2712 2773
2713 nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); 2774 err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy);
2775 if (err < 0)
2776 return err;
2714 2777
2715 if (!tb[CTA_EXPECT_NAT_DIR] || !tb[CTA_EXPECT_NAT_TUPLE]) 2778 if (!tb[CTA_EXPECT_NAT_DIR] || !tb[CTA_EXPECT_NAT_TUPLE])
2716 return -EINVAL; 2779 return -EINVAL;
@@ -2730,76 +2793,26 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
2730#endif 2793#endif
2731} 2794}
2732 2795
2733static int 2796static struct nf_conntrack_expect *
2734ctnetlink_create_expect(struct net *net, u16 zone, 2797ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
2735 const struct nlattr * const cda[], 2798 struct nf_conntrack_helper *helper,
2736 u_int8_t u3, 2799 struct nf_conntrack_tuple *tuple,
2737 u32 portid, int report) 2800 struct nf_conntrack_tuple *mask)
2738{ 2801{
2739 struct nf_conntrack_tuple tuple, mask, master_tuple; 2802 u_int32_t class = 0;
2740 struct nf_conntrack_tuple_hash *h = NULL;
2741 struct nf_conntrack_expect *exp; 2803 struct nf_conntrack_expect *exp;
2742 struct nf_conn *ct;
2743 struct nf_conn_help *help; 2804 struct nf_conn_help *help;
2744 struct nf_conntrack_helper *helper = NULL; 2805 int err;
2745 u_int32_t class = 0;
2746 int err = 0;
2747
2748 /* caller guarantees that those three CTA_EXPECT_* exist */
2749 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
2750 if (err < 0)
2751 return err;
2752 err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
2753 if (err < 0)
2754 return err;
2755 err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
2756 if (err < 0)
2757 return err;
2758
2759 /* Look for master conntrack of this expectation */
2760 h = nf_conntrack_find_get(net, zone, &master_tuple);
2761 if (!h)
2762 return -ENOENT;
2763 ct = nf_ct_tuplehash_to_ctrack(h);
2764
2765 /* Look for helper of this expectation */
2766 if (cda[CTA_EXPECT_HELP_NAME]) {
2767 const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
2768
2769 helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
2770 nf_ct_protonum(ct));
2771 if (helper == NULL) {
2772#ifdef CONFIG_MODULES
2773 if (request_module("nfct-helper-%s", helpname) < 0) {
2774 err = -EOPNOTSUPP;
2775 goto out;
2776 }
2777
2778 helper = __nf_conntrack_helper_find(helpname,
2779 nf_ct_l3num(ct),
2780 nf_ct_protonum(ct));
2781 if (helper) {
2782 err = -EAGAIN;
2783 goto out;
2784 }
2785#endif
2786 err = -EOPNOTSUPP;
2787 goto out;
2788 }
2789 }
2790 2806
2791 if (cda[CTA_EXPECT_CLASS] && helper) { 2807 if (cda[CTA_EXPECT_CLASS] && helper) {
2792 class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); 2808 class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
2793 if (class > helper->expect_class_max) { 2809 if (class > helper->expect_class_max)
2794 err = -EINVAL; 2810 return ERR_PTR(-EINVAL);
2795 goto out;
2796 }
2797 } 2811 }
2798 exp = nf_ct_expect_alloc(ct); 2812 exp = nf_ct_expect_alloc(ct);
2799 if (!exp) { 2813 if (!exp)
2800 err = -ENOMEM; 2814 return ERR_PTR(-ENOMEM);
2801 goto out; 2815
2802 }
2803 help = nfct_help(ct); 2816 help = nfct_help(ct);
2804 if (!help) { 2817 if (!help) {
2805 if (!cda[CTA_EXPECT_TIMEOUT]) { 2818 if (!cda[CTA_EXPECT_TIMEOUT]) {
@@ -2837,21 +2850,89 @@ ctnetlink_create_expect(struct net *net, u16 zone,
2837 exp->class = class; 2850 exp->class = class;
2838 exp->master = ct; 2851 exp->master = ct;
2839 exp->helper = helper; 2852 exp->helper = helper;
2840 memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); 2853 exp->tuple = *tuple;
2841 memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3)); 2854 exp->mask.src.u3 = mask->src.u3;
2842 exp->mask.src.u.all = mask.src.u.all; 2855 exp->mask.src.u.all = mask->src.u.all;
2843 2856
2844 if (cda[CTA_EXPECT_NAT]) { 2857 if (cda[CTA_EXPECT_NAT]) {
2845 err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT], 2858 err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT],
2846 exp, u3); 2859 exp, nf_ct_l3num(ct));
2847 if (err < 0) 2860 if (err < 0)
2848 goto err_out; 2861 goto err_out;
2849 } 2862 }
2850 err = nf_ct_expect_related_report(exp, portid, report); 2863 return exp;
2851err_out: 2864err_out:
2852 nf_ct_expect_put(exp); 2865 nf_ct_expect_put(exp);
2853out: 2866 return ERR_PTR(err);
2854 nf_ct_put(nf_ct_tuplehash_to_ctrack(h)); 2867}
2868
2869static int
2870ctnetlink_create_expect(struct net *net, u16 zone,
2871 const struct nlattr * const cda[],
2872 u_int8_t u3, u32 portid, int report)
2873{
2874 struct nf_conntrack_tuple tuple, mask, master_tuple;
2875 struct nf_conntrack_tuple_hash *h = NULL;
2876 struct nf_conntrack_helper *helper = NULL;
2877 struct nf_conntrack_expect *exp;
2878 struct nf_conn *ct;
2879 int err;
2880
2881 /* caller guarantees that those three CTA_EXPECT_* exist */
2882 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
2883 if (err < 0)
2884 return err;
2885 err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
2886 if (err < 0)
2887 return err;
2888 err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
2889 if (err < 0)
2890 return err;
2891
2892 /* Look for master conntrack of this expectation */
2893 h = nf_conntrack_find_get(net, zone, &master_tuple);
2894 if (!h)
2895 return -ENOENT;
2896 ct = nf_ct_tuplehash_to_ctrack(h);
2897
2898 if (cda[CTA_EXPECT_HELP_NAME]) {
2899 const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
2900
2901 helper = __nf_conntrack_helper_find(helpname, u3,
2902 nf_ct_protonum(ct));
2903 if (helper == NULL) {
2904#ifdef CONFIG_MODULES
2905 if (request_module("nfct-helper-%s", helpname) < 0) {
2906 err = -EOPNOTSUPP;
2907 goto err_ct;
2908 }
2909 helper = __nf_conntrack_helper_find(helpname, u3,
2910 nf_ct_protonum(ct));
2911 if (helper) {
2912 err = -EAGAIN;
2913 goto err_ct;
2914 }
2915#endif
2916 err = -EOPNOTSUPP;
2917 goto err_ct;
2918 }
2919 }
2920
2921 exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
2922 if (IS_ERR(exp)) {
2923 err = PTR_ERR(exp);
2924 goto err_ct;
2925 }
2926
2927 err = nf_ct_expect_related_report(exp, portid, report);
2928 if (err < 0)
2929 goto err_exp;
2930
2931 return 0;
2932err_exp:
2933 nf_ct_expect_put(exp);
2934err_ct:
2935 nf_ct_put(ct);
2855 return err; 2936 return err;
2856} 2937}
2857 2938
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 0ab9636ac57e..ce3004156eeb 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -281,7 +281,7 @@ void nf_ct_l3proto_pernet_unregister(struct net *net,
281 nf_ct_l3proto_unregister_sysctl(net, proto); 281 nf_ct_l3proto_unregister_sysctl(net, proto);
282 282
283 /* Remove all contrack entries for this protocol */ 283 /* Remove all contrack entries for this protocol */
284 nf_ct_iterate_cleanup(net, kill_l3proto, proto); 284 nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
285} 285}
286EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); 286EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
287 287
@@ -476,7 +476,7 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
476 nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); 476 nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
477 477
478 /* Remove all contrack entries for this protocol */ 478 /* Remove all contrack entries for this protocol */
479 nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); 479 nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
480} 480}
481EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); 481EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
482 482
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4d4d8f1d01fc..44d1ea32570a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -27,6 +27,8 @@
27#include <net/netfilter/nf_conntrack.h> 27#include <net/netfilter/nf_conntrack.h>
28#include <net/netfilter/nf_conntrack_l4proto.h> 28#include <net/netfilter/nf_conntrack_l4proto.h>
29#include <net/netfilter/nf_conntrack_ecache.h> 29#include <net/netfilter/nf_conntrack_ecache.h>
30#include <net/netfilter/nf_conntrack_seqadj.h>
31#include <net/netfilter/nf_conntrack_synproxy.h>
30#include <net/netfilter/nf_log.h> 32#include <net/netfilter/nf_log.h>
31#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 33#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
32#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 34#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -495,21 +497,6 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
495 } 497 }
496} 498}
497 499
498#ifdef CONFIG_NF_NAT_NEEDED
499static inline s16 nat_offset(const struct nf_conn *ct,
500 enum ip_conntrack_dir dir,
501 u32 seq)
502{
503 typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
504
505 return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
506}
507#define NAT_OFFSET(ct, dir, seq) \
508 (nat_offset(ct, dir, seq))
509#else
510#define NAT_OFFSET(ct, dir, seq) 0
511#endif
512
513static bool tcp_in_window(const struct nf_conn *ct, 500static bool tcp_in_window(const struct nf_conn *ct,
514 struct ip_ct_tcp *state, 501 struct ip_ct_tcp *state,
515 enum ip_conntrack_dir dir, 502 enum ip_conntrack_dir dir,
@@ -525,8 +512,8 @@ static bool tcp_in_window(const struct nf_conn *ct,
525 struct ip_ct_tcp_state *receiver = &state->seen[!dir]; 512 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
526 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; 513 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
527 __u32 seq, ack, sack, end, win, swin; 514 __u32 seq, ack, sack, end, win, swin;
528 s16 receiver_offset; 515 s32 receiver_offset;
529 bool res; 516 bool res, in_recv_win;
530 517
531 /* 518 /*
532 * Get the required data from the packet. 519 * Get the required data from the packet.
@@ -540,7 +527,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
540 tcp_sack(skb, dataoff, tcph, &sack); 527 tcp_sack(skb, dataoff, tcph, &sack);
541 528
542 /* Take into account NAT sequence number mangling */ 529 /* Take into account NAT sequence number mangling */
543 receiver_offset = NAT_OFFSET(ct, !dir, ack - 1); 530 receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
544 ack -= receiver_offset; 531 ack -= receiver_offset;
545 sack -= receiver_offset; 532 sack -= receiver_offset;
546 533
@@ -649,14 +636,18 @@ static bool tcp_in_window(const struct nf_conn *ct,
649 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 636 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
650 receiver->td_scale); 637 receiver->td_scale);
651 638
639 /* Is the ending sequence in the receive window (if available)? */
640 in_recv_win = !receiver->td_maxwin ||
641 after(end, sender->td_end - receiver->td_maxwin - 1);
642
652 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", 643 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
653 before(seq, sender->td_maxend + 1), 644 before(seq, sender->td_maxend + 1),
654 after(end, sender->td_end - receiver->td_maxwin - 1), 645 (in_recv_win ? 1 : 0),
655 before(sack, receiver->td_end + 1), 646 before(sack, receiver->td_end + 1),
656 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); 647 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
657 648
658 if (before(seq, sender->td_maxend + 1) && 649 if (before(seq, sender->td_maxend + 1) &&
659 after(end, sender->td_end - receiver->td_maxwin - 1) && 650 in_recv_win &&
660 before(sack, receiver->td_end + 1) && 651 before(sack, receiver->td_end + 1) &&
661 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { 652 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
662 /* 653 /*
@@ -725,7 +716,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
725 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, 716 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
726 "nf_ct_tcp: %s ", 717 "nf_ct_tcp: %s ",
727 before(seq, sender->td_maxend + 1) ? 718 before(seq, sender->td_maxend + 1) ?
728 after(end, sender->td_end - receiver->td_maxwin - 1) ? 719 in_recv_win ?
729 before(sack, receiver->td_end + 1) ? 720 before(sack, receiver->td_end + 1) ?
730 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" 721 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
731 : "ACK is under the lower bound (possible overly delayed ACK)" 722 : "ACK is under the lower bound (possible overly delayed ACK)"
@@ -956,6 +947,21 @@ static int tcp_packet(struct nf_conn *ct,
956 "state %s ", tcp_conntrack_names[old_state]); 947 "state %s ", tcp_conntrack_names[old_state]);
957 return NF_ACCEPT; 948 return NF_ACCEPT;
958 case TCP_CONNTRACK_MAX: 949 case TCP_CONNTRACK_MAX:
950 /* Special case for SYN proxy: when the SYN to the server or
951 * the SYN/ACK from the server is lost, the client may transmit
952 * a keep-alive packet while in SYN_SENT state. This needs to
953 * be associated with the original conntrack entry in order to
954 * generate a new SYN with the correct sequence number.
955 */
956 if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
957 index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
958 ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
959 ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
960 pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
961 spin_unlock_bh(&ct->lock);
962 return NF_ACCEPT;
963 }
964
959 /* Invalid packet */ 965 /* Invalid packet */
960 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", 966 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
961 dir, get_conntrack_index(th), old_state); 967 dir, get_conntrack_index(th), old_state);
@@ -1043,6 +1049,12 @@ static int tcp_packet(struct nf_conn *ct,
1043 nf_ct_kill_acct(ct, ctinfo, skb); 1049 nf_ct_kill_acct(ct, ctinfo, skb);
1044 return NF_ACCEPT; 1050 return NF_ACCEPT;
1045 } 1051 }
1052 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1053 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1054 */
1055 if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1056 timeout > timeouts[TCP_CONNTRACK_UNACK])
1057 timeout = timeouts[TCP_CONNTRACK_UNACK];
1046 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) 1058 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1047 && (old_state == TCP_CONNTRACK_SYN_RECV 1059 && (old_state == TCP_CONNTRACK_SYN_RECV
1048 || old_state == TCP_CONNTRACK_ESTABLISHED) 1060 || old_state == TCP_CONNTRACK_ESTABLISHED)
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
new file mode 100644
index 000000000000..5f9bfd060dea
--- /dev/null
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -0,0 +1,238 @@
1#include <linux/types.h>
2#include <linux/netfilter.h>
3#include <net/tcp.h>
4
5#include <net/netfilter/nf_conntrack.h>
6#include <net/netfilter/nf_conntrack_extend.h>
7#include <net/netfilter/nf_conntrack_seqadj.h>
8
9int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
10 s32 off)
11{
12 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
13 struct nf_conn_seqadj *seqadj;
14 struct nf_ct_seqadj *this_way;
15
16 if (off == 0)
17 return 0;
18
19 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
20
21 seqadj = nfct_seqadj(ct);
22 this_way = &seqadj->seq[dir];
23 this_way->offset_before = off;
24 this_way->offset_after = off;
25 return 0;
26}
27EXPORT_SYMBOL_GPL(nf_ct_seqadj_init);
28
29int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
30 __be32 seq, s32 off)
31{
32 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
33 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
34 struct nf_ct_seqadj *this_way;
35
36 if (off == 0)
37 return 0;
38
39 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
40
41 spin_lock_bh(&ct->lock);
42 this_way = &seqadj->seq[dir];
43 if (this_way->offset_before == this_way->offset_after ||
44 before(this_way->correction_pos, seq)) {
45 this_way->correction_pos = seq;
46 this_way->offset_before = this_way->offset_after;
47 this_way->offset_after += off;
48 }
49 spin_unlock_bh(&ct->lock);
50 return 0;
51}
52EXPORT_SYMBOL_GPL(nf_ct_seqadj_set);
53
54void nf_ct_tcp_seqadj_set(struct sk_buff *skb,
55 struct nf_conn *ct, enum ip_conntrack_info ctinfo,
56 s32 off)
57{
58 const struct tcphdr *th;
59
60 if (nf_ct_protonum(ct) != IPPROTO_TCP)
61 return;
62
63 th = (struct tcphdr *)(skb_network_header(skb) + ip_hdrlen(skb));
64 nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
65}
66EXPORT_SYMBOL_GPL(nf_ct_tcp_seqadj_set);
67
68/* Adjust one found SACK option including checksum correction */
69static void nf_ct_sack_block_adjust(struct sk_buff *skb,
70 struct tcphdr *tcph,
71 unsigned int sackoff,
72 unsigned int sackend,
73 struct nf_ct_seqadj *seq)
74{
75 while (sackoff < sackend) {
76 struct tcp_sack_block_wire *sack;
77 __be32 new_start_seq, new_end_seq;
78
79 sack = (void *)skb->data + sackoff;
80 if (after(ntohl(sack->start_seq) - seq->offset_before,
81 seq->correction_pos))
82 new_start_seq = htonl(ntohl(sack->start_seq) -
83 seq->offset_after);
84 else
85 new_start_seq = htonl(ntohl(sack->start_seq) -
86 seq->offset_before);
87
88 if (after(ntohl(sack->end_seq) - seq->offset_before,
89 seq->correction_pos))
90 new_end_seq = htonl(ntohl(sack->end_seq) -
91 seq->offset_after);
92 else
93 new_end_seq = htonl(ntohl(sack->end_seq) -
94 seq->offset_before);
95
96 pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
97 ntohl(sack->start_seq), new_start_seq,
98 ntohl(sack->end_seq), new_end_seq);
99
100 inet_proto_csum_replace4(&tcph->check, skb,
101 sack->start_seq, new_start_seq, 0);
102 inet_proto_csum_replace4(&tcph->check, skb,
103 sack->end_seq, new_end_seq, 0);
104 sack->start_seq = new_start_seq;
105 sack->end_seq = new_end_seq;
106 sackoff += sizeof(*sack);
107 }
108}
109
110/* TCP SACK sequence number adjustment */
111static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
112 unsigned int protoff,
113 struct tcphdr *tcph,
114 struct nf_conn *ct,
115 enum ip_conntrack_info ctinfo)
116{
117 unsigned int dir, optoff, optend;
118 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
119
120 optoff = protoff + sizeof(struct tcphdr);
121 optend = protoff + tcph->doff * 4;
122
123 if (!skb_make_writable(skb, optend))
124 return 0;
125
126 dir = CTINFO2DIR(ctinfo);
127
128 while (optoff < optend) {
129 /* Usually: option, length. */
130 unsigned char *op = skb->data + optoff;
131
132 switch (op[0]) {
133 case TCPOPT_EOL:
134 return 1;
135 case TCPOPT_NOP:
136 optoff++;
137 continue;
138 default:
139 /* no partial options */
140 if (optoff + 1 == optend ||
141 optoff + op[1] > optend ||
142 op[1] < 2)
143 return 0;
144 if (op[0] == TCPOPT_SACK &&
145 op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
146 ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
147 nf_ct_sack_block_adjust(skb, tcph, optoff + 2,
148 optoff+op[1],
149 &seqadj->seq[!dir]);
150 optoff += op[1];
151 }
152 }
153 return 1;
154}
155
156/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
157int nf_ct_seq_adjust(struct sk_buff *skb,
158 struct nf_conn *ct, enum ip_conntrack_info ctinfo,
159 unsigned int protoff)
160{
161 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
162 struct tcphdr *tcph;
163 __be32 newseq, newack;
164 s32 seqoff, ackoff;
165 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
166 struct nf_ct_seqadj *this_way, *other_way;
167 int res;
168
169 this_way = &seqadj->seq[dir];
170 other_way = &seqadj->seq[!dir];
171
172 if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
173 return 0;
174
175 tcph = (void *)skb->data + protoff;
176 spin_lock_bh(&ct->lock);
177 if (after(ntohl(tcph->seq), this_way->correction_pos))
178 seqoff = this_way->offset_after;
179 else
180 seqoff = this_way->offset_before;
181
182 if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
183 other_way->correction_pos))
184 ackoff = other_way->offset_after;
185 else
186 ackoff = other_way->offset_before;
187
188 newseq = htonl(ntohl(tcph->seq) + seqoff);
189 newack = htonl(ntohl(tcph->ack_seq) - ackoff);
190
191 inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
192 inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
193
194 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
195 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
196 ntohl(newack));
197
198 tcph->seq = newseq;
199 tcph->ack_seq = newack;
200
201 res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
202 spin_unlock_bh(&ct->lock);
203
204 return res;
205}
206EXPORT_SYMBOL_GPL(nf_ct_seq_adjust);
207
208s32 nf_ct_seq_offset(const struct nf_conn *ct,
209 enum ip_conntrack_dir dir,
210 u32 seq)
211{
212 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
213 struct nf_ct_seqadj *this_way;
214
215 if (!seqadj)
216 return 0;
217
218 this_way = &seqadj->seq[dir];
219 return after(seq, this_way->correction_pos) ?
220 this_way->offset_after : this_way->offset_before;
221}
222EXPORT_SYMBOL_GPL(nf_ct_seq_offset);
223
224static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = {
225 .len = sizeof(struct nf_conn_seqadj),
226 .align = __alignof__(struct nf_conn_seqadj),
227 .id = NF_CT_EXT_SEQADJ,
228};
229
230int nf_conntrack_seqadj_init(void)
231{
232 return nf_ct_extend_register(&nf_ct_seqadj_extend);
233}
234
235void nf_conntrack_seqadj_fini(void)
236{
237 nf_ct_extend_unregister(&nf_ct_seqadj_extend);
238}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index bd700b4013c1..f641751dba9d 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -408,7 +408,7 @@ static int log_invalid_proto_max = 255;
408 408
409static struct ctl_table_header *nf_ct_netfilter_header; 409static struct ctl_table_header *nf_ct_netfilter_header;
410 410
411static ctl_table nf_ct_sysctl_table[] = { 411static struct ctl_table nf_ct_sysctl_table[] = {
412 { 412 {
413 .procname = "nf_conntrack_max", 413 .procname = "nf_conntrack_max",
414 .data = &nf_conntrack_max, 414 .data = &nf_conntrack_max,
@@ -458,7 +458,7 @@ static ctl_table nf_ct_sysctl_table[] = {
458 458
459#define NET_NF_CONNTRACK_MAX 2089 459#define NET_NF_CONNTRACK_MAX 2089
460 460
461static ctl_table nf_ct_netfilter_table[] = { 461static struct ctl_table nf_ct_netfilter_table[] = {
462 { 462 {
463 .procname = "nf_conntrack_max", 463 .procname = "nf_conntrack_max",
464 .data = &nf_conntrack_max, 464 .data = &nf_conntrack_max,
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 3b18dd1be7d9..85296d4eac0e 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -245,7 +245,7 @@ static const struct file_operations nflog_file_ops = {
245static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; 245static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
246static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; 246static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
247 247
248static int nf_log_proc_dostring(ctl_table *table, int write, 248static int nf_log_proc_dostring(struct ctl_table *table, int write,
249 void __user *buffer, size_t *lenp, loff_t *ppos) 249 void __user *buffer, size_t *lenp, loff_t *ppos)
250{ 250{
251 const struct nf_logger *logger; 251 const struct nf_logger *logger;
@@ -369,9 +369,7 @@ static int __net_init nf_log_net_init(struct net *net)
369 369
370out_sysctl: 370out_sysctl:
371#ifdef CONFIG_PROC_FS 371#ifdef CONFIG_PROC_FS
372 /* For init_net: errors will trigger panic, don't unroll on error. */ 372 remove_proc_entry("nf_log", net->nf.proc_netfilter);
373 if (!net_eq(net, &init_net))
374 remove_proc_entry("nf_log", net->nf.proc_netfilter);
375#endif 373#endif
376 return ret; 374 return ret;
377} 375}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 038eee5c8f85..6f0f4f7f68a5 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -25,6 +25,7 @@
25#include <net/netfilter/nf_nat_core.h> 25#include <net/netfilter/nf_nat_core.h>
26#include <net/netfilter/nf_nat_helper.h> 26#include <net/netfilter/nf_nat_helper.h>
27#include <net/netfilter/nf_conntrack_helper.h> 27#include <net/netfilter/nf_conntrack_helper.h>
28#include <net/netfilter/nf_conntrack_seqadj.h>
28#include <net/netfilter/nf_conntrack_l3proto.h> 29#include <net/netfilter/nf_conntrack_l3proto.h>
29#include <net/netfilter/nf_conntrack_zones.h> 30#include <net/netfilter/nf_conntrack_zones.h>
30#include <linux/netfilter/nf_nat.h> 31#include <linux/netfilter/nf_nat.h>
@@ -402,6 +403,9 @@ nf_nat_setup_info(struct nf_conn *ct,
402 ct->status |= IPS_SRC_NAT; 403 ct->status |= IPS_SRC_NAT;
403 else 404 else
404 ct->status |= IPS_DST_NAT; 405 ct->status |= IPS_DST_NAT;
406
407 if (nfct_help(ct))
408 nfct_seqadj_ext_add(ct);
405 } 409 }
406 410
407 if (maniptype == NF_NAT_MANIP_SRC) { 411 if (maniptype == NF_NAT_MANIP_SRC) {
@@ -497,7 +501,7 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
497 501
498 rtnl_lock(); 502 rtnl_lock();
499 for_each_net(net) 503 for_each_net(net)
500 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); 504 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
501 rtnl_unlock(); 505 rtnl_unlock();
502} 506}
503 507
@@ -511,7 +515,7 @@ static void nf_nat_l3proto_clean(u8 l3proto)
511 rtnl_lock(); 515 rtnl_lock();
512 516
513 for_each_net(net) 517 for_each_net(net)
514 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); 518 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
515 rtnl_unlock(); 519 rtnl_unlock();
516} 520}
517 521
@@ -749,7 +753,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
749{ 753{
750 struct nf_nat_proto_clean clean = {}; 754 struct nf_nat_proto_clean clean = {};
751 755
752 nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); 756 nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
753 synchronize_rcu(); 757 synchronize_rcu();
754 nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); 758 nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
755} 759}
@@ -764,10 +768,6 @@ static struct nf_ct_helper_expectfn follow_master_nat = {
764 .expectfn = nf_nat_follow_master, 768 .expectfn = nf_nat_follow_master,
765}; 769};
766 770
767static struct nfq_ct_nat_hook nfq_ct_nat = {
768 .seq_adjust = nf_nat_tcp_seq_adjust,
769};
770
771static int __init nf_nat_init(void) 771static int __init nf_nat_init(void)
772{ 772{
773 int ret; 773 int ret;
@@ -787,14 +787,9 @@ static int __init nf_nat_init(void)
787 /* Initialize fake conntrack so that NAT will skip it */ 787 /* Initialize fake conntrack so that NAT will skip it */
788 nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); 788 nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
789 789
790 BUG_ON(nf_nat_seq_adjust_hook != NULL);
791 RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
792 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); 790 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
793 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, 791 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
794 nfnetlink_parse_nat_setup); 792 nfnetlink_parse_nat_setup);
795 BUG_ON(nf_ct_nat_offset != NULL);
796 RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
797 RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat);
798#ifdef CONFIG_XFRM 793#ifdef CONFIG_XFRM
799 BUG_ON(nf_nat_decode_session_hook != NULL); 794 BUG_ON(nf_nat_decode_session_hook != NULL);
800 RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); 795 RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
@@ -813,10 +808,7 @@ static void __exit nf_nat_cleanup(void)
813 unregister_pernet_subsys(&nf_nat_net_ops); 808 unregister_pernet_subsys(&nf_nat_net_ops);
814 nf_ct_extend_unregister(&nat_extend); 809 nf_ct_extend_unregister(&nat_extend);
815 nf_ct_helper_expectfn_unregister(&follow_master_nat); 810 nf_ct_helper_expectfn_unregister(&follow_master_nat);
816 RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
817 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); 811 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
818 RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
819 RCU_INIT_POINTER(nfq_ct_nat_hook, NULL);
820#ifdef CONFIG_XFRM 812#ifdef CONFIG_XFRM
821 RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); 813 RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
822#endif 814#endif
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 5fea563afe30..2840abb5bb99 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -20,74 +20,13 @@
20#include <net/netfilter/nf_conntrack_helper.h> 20#include <net/netfilter/nf_conntrack_helper.h>
21#include <net/netfilter/nf_conntrack_ecache.h> 21#include <net/netfilter/nf_conntrack_ecache.h>
22#include <net/netfilter/nf_conntrack_expect.h> 22#include <net/netfilter/nf_conntrack_expect.h>
23#include <net/netfilter/nf_conntrack_seqadj.h>
23#include <net/netfilter/nf_nat.h> 24#include <net/netfilter/nf_nat.h>
24#include <net/netfilter/nf_nat_l3proto.h> 25#include <net/netfilter/nf_nat_l3proto.h>
25#include <net/netfilter/nf_nat_l4proto.h> 26#include <net/netfilter/nf_nat_l4proto.h>
26#include <net/netfilter/nf_nat_core.h> 27#include <net/netfilter/nf_nat_core.h>
27#include <net/netfilter/nf_nat_helper.h> 28#include <net/netfilter/nf_nat_helper.h>
28 29
29#define DUMP_OFFSET(x) \
30 pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
31 x->offset_before, x->offset_after, x->correction_pos);
32
33static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
34
35/* Setup TCP sequence correction given this change at this sequence */
36static inline void
37adjust_tcp_sequence(u32 seq,
38 int sizediff,
39 struct nf_conn *ct,
40 enum ip_conntrack_info ctinfo)
41{
42 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
43 struct nf_conn_nat *nat = nfct_nat(ct);
44 struct nf_nat_seq *this_way = &nat->seq[dir];
45
46 pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
47 seq, sizediff);
48
49 pr_debug("adjust_tcp_sequence: Seq_offset before: ");
50 DUMP_OFFSET(this_way);
51
52 spin_lock_bh(&nf_nat_seqofs_lock);
53
54 /* SYN adjust. If it's uninitialized, or this is after last
55 * correction, record it: we don't handle more than one
56 * adjustment in the window, but do deal with common case of a
57 * retransmit */
58 if (this_way->offset_before == this_way->offset_after ||
59 before(this_way->correction_pos, seq)) {
60 this_way->correction_pos = seq;
61 this_way->offset_before = this_way->offset_after;
62 this_way->offset_after += sizediff;
63 }
64 spin_unlock_bh(&nf_nat_seqofs_lock);
65
66 pr_debug("adjust_tcp_sequence: Seq_offset after: ");
67 DUMP_OFFSET(this_way);
68}
69
70/* Get the offset value, for conntrack */
71s16 nf_nat_get_offset(const struct nf_conn *ct,
72 enum ip_conntrack_dir dir,
73 u32 seq)
74{
75 struct nf_conn_nat *nat = nfct_nat(ct);
76 struct nf_nat_seq *this_way;
77 s16 offset;
78
79 if (!nat)
80 return 0;
81
82 this_way = &nat->seq[dir];
83 spin_lock_bh(&nf_nat_seqofs_lock);
84 offset = after(seq, this_way->correction_pos)
85 ? this_way->offset_after : this_way->offset_before;
86 spin_unlock_bh(&nf_nat_seqofs_lock);
87
88 return offset;
89}
90
91/* Frobs data inside this packet, which is linear. */ 30/* Frobs data inside this packet, which is linear. */
92static void mangle_contents(struct sk_buff *skb, 31static void mangle_contents(struct sk_buff *skb,
93 unsigned int dataoff, 32 unsigned int dataoff,
@@ -104,7 +43,7 @@ static void mangle_contents(struct sk_buff *skb,
104 /* move post-replacement */ 43 /* move post-replacement */
105 memmove(data + match_offset + rep_len, 44 memmove(data + match_offset + rep_len,
106 data + match_offset + match_len, 45 data + match_offset + match_len,
107 skb->tail - (skb->network_header + dataoff + 46 skb_tail_pointer(skb) - (skb_network_header(skb) + dataoff +
108 match_offset + match_len)); 47 match_offset + match_len));
109 48
110 /* insert data from buffer */ 49 /* insert data from buffer */
@@ -142,30 +81,6 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
142 return 1; 81 return 1;
143} 82}
144 83
145void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
146 __be32 seq, s16 off)
147{
148 if (!off)
149 return;
150 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
151 adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
152 nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
153}
154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
155
156void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
157 u32 ctinfo, int off)
158{
159 const struct tcphdr *th;
160
161 if (nf_ct_protonum(ct) != IPPROTO_TCP)
162 return;
163
164 th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb));
165 nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
166}
167EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
168
169/* Generic function for mangling variable-length address changes inside 84/* Generic function for mangling variable-length address changes inside
170 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX 85 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
171 * command in FTP). 86 * command in FTP).
@@ -210,8 +125,8 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
210 datalen, oldlen); 125 datalen, oldlen);
211 126
212 if (adjust && rep_len != match_len) 127 if (adjust && rep_len != match_len)
213 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, 128 nf_ct_seqadj_set(ct, ctinfo, tcph->seq,
214 (int)rep_len - (int)match_len); 129 (int)rep_len - (int)match_len);
215 130
216 return 1; 131 return 1;
217} 132}
@@ -271,145 +186,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
271} 186}
272EXPORT_SYMBOL(nf_nat_mangle_udp_packet); 187EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
273 188
274/* Adjust one found SACK option including checksum correction */
275static void
276sack_adjust(struct sk_buff *skb,
277 struct tcphdr *tcph,
278 unsigned int sackoff,
279 unsigned int sackend,
280 struct nf_nat_seq *natseq)
281{
282 while (sackoff < sackend) {
283 struct tcp_sack_block_wire *sack;
284 __be32 new_start_seq, new_end_seq;
285
286 sack = (void *)skb->data + sackoff;
287 if (after(ntohl(sack->start_seq) - natseq->offset_before,
288 natseq->correction_pos))
289 new_start_seq = htonl(ntohl(sack->start_seq)
290 - natseq->offset_after);
291 else
292 new_start_seq = htonl(ntohl(sack->start_seq)
293 - natseq->offset_before);
294
295 if (after(ntohl(sack->end_seq) - natseq->offset_before,
296 natseq->correction_pos))
297 new_end_seq = htonl(ntohl(sack->end_seq)
298 - natseq->offset_after);
299 else
300 new_end_seq = htonl(ntohl(sack->end_seq)
301 - natseq->offset_before);
302
303 pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
304 ntohl(sack->start_seq), new_start_seq,
305 ntohl(sack->end_seq), new_end_seq);
306
307 inet_proto_csum_replace4(&tcph->check, skb,
308 sack->start_seq, new_start_seq, 0);
309 inet_proto_csum_replace4(&tcph->check, skb,
310 sack->end_seq, new_end_seq, 0);
311 sack->start_seq = new_start_seq;
312 sack->end_seq = new_end_seq;
313 sackoff += sizeof(*sack);
314 }
315}
316
317/* TCP SACK sequence number adjustment */
318static inline unsigned int
319nf_nat_sack_adjust(struct sk_buff *skb,
320 unsigned int protoff,
321 struct tcphdr *tcph,
322 struct nf_conn *ct,
323 enum ip_conntrack_info ctinfo)
324{
325 unsigned int dir, optoff, optend;
326 struct nf_conn_nat *nat = nfct_nat(ct);
327
328 optoff = protoff + sizeof(struct tcphdr);
329 optend = protoff + tcph->doff * 4;
330
331 if (!skb_make_writable(skb, optend))
332 return 0;
333
334 dir = CTINFO2DIR(ctinfo);
335
336 while (optoff < optend) {
337 /* Usually: option, length. */
338 unsigned char *op = skb->data + optoff;
339
340 switch (op[0]) {
341 case TCPOPT_EOL:
342 return 1;
343 case TCPOPT_NOP:
344 optoff++;
345 continue;
346 default:
347 /* no partial options */
348 if (optoff + 1 == optend ||
349 optoff + op[1] > optend ||
350 op[1] < 2)
351 return 0;
352 if (op[0] == TCPOPT_SACK &&
353 op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
354 ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
355 sack_adjust(skb, tcph, optoff+2,
356 optoff+op[1], &nat->seq[!dir]);
357 optoff += op[1];
358 }
359 }
360 return 1;
361}
362
363/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
364int
365nf_nat_seq_adjust(struct sk_buff *skb,
366 struct nf_conn *ct,
367 enum ip_conntrack_info ctinfo,
368 unsigned int protoff)
369{
370 struct tcphdr *tcph;
371 int dir;
372 __be32 newseq, newack;
373 s16 seqoff, ackoff;
374 struct nf_conn_nat *nat = nfct_nat(ct);
375 struct nf_nat_seq *this_way, *other_way;
376
377 dir = CTINFO2DIR(ctinfo);
378
379 this_way = &nat->seq[dir];
380 other_way = &nat->seq[!dir];
381
382 if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
383 return 0;
384
385 tcph = (void *)skb->data + protoff;
386 if (after(ntohl(tcph->seq), this_way->correction_pos))
387 seqoff = this_way->offset_after;
388 else
389 seqoff = this_way->offset_before;
390
391 if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
392 other_way->correction_pos))
393 ackoff = other_way->offset_after;
394 else
395 ackoff = other_way->offset_before;
396
397 newseq = htonl(ntohl(tcph->seq) + seqoff);
398 newack = htonl(ntohl(tcph->ack_seq) - ackoff);
399
400 inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
401 inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
402
403 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
404 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
405 ntohl(newack));
406
407 tcph->seq = newseq;
408 tcph->ack_seq = newack;
409
410 return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
411}
412
413/* Setup NAT on this expected conntrack so it follows master. */ 189/* Setup NAT on this expected conntrack so it follows master. */
414/* If we fail to get a free NAT slot, we'll get dropped on confirm */ 190/* If we fail to get a free NAT slot, we'll get dropped on confirm */
415void nf_nat_follow_master(struct nf_conn *ct, 191void nf_nat_follow_master(struct nf_conn *ct,
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 396e55d46f90..754536f2c674 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -34,9 +34,7 @@ sctp_manip_pkt(struct sk_buff *skb,
34 const struct nf_conntrack_tuple *tuple, 34 const struct nf_conntrack_tuple *tuple,
35 enum nf_nat_manip_type maniptype) 35 enum nf_nat_manip_type maniptype)
36{ 36{
37 struct sk_buff *frag;
38 sctp_sctphdr_t *hdr; 37 sctp_sctphdr_t *hdr;
39 __u32 crc32;
40 38
41 if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) 39 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
42 return false; 40 return false;
@@ -51,11 +49,7 @@ sctp_manip_pkt(struct sk_buff *skb,
51 hdr->dest = tuple->dst.u.sctp.port; 49 hdr->dest = tuple->dst.u.sctp.port;
52 } 50 }
53 51
54 crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff); 52 hdr->checksum = sctp_compute_cksum(skb, hdroff);
55 skb_walk_frags(skb, frag)
56 crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
57 crc32);
58 hdr->checksum = sctp_end_cksum(crc32);
59 53
60 return true; 54 return true;
61} 55}
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 96ccdf78a29f..f9790405b7ff 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -20,6 +20,7 @@
20#include <net/netfilter/nf_nat_helper.h> 20#include <net/netfilter/nf_nat_helper.h>
21#include <net/netfilter/nf_conntrack_helper.h> 21#include <net/netfilter/nf_conntrack_helper.h>
22#include <net/netfilter/nf_conntrack_expect.h> 22#include <net/netfilter/nf_conntrack_expect.h>
23#include <net/netfilter/nf_conntrack_seqadj.h>
23#include <linux/netfilter/nf_conntrack_sip.h> 24#include <linux/netfilter/nf_conntrack_sip.h>
24 25
25MODULE_LICENSE("GPL"); 26MODULE_LICENSE("GPL");
@@ -230,9 +231,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
230 &ct->tuplehash[!dir].tuple.src.u3, 231 &ct->tuplehash[!dir].tuple.src.u3,
231 false); 232 false);
232 if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, 233 if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
233 poff, plen, buffer, buflen)) 234 poff, plen, buffer, buflen)) {
234 nf_ct_helper_log(skb, ct, "cannot mangle received"); 235 nf_ct_helper_log(skb, ct, "cannot mangle received");
235 return NF_DROP; 236 return NF_DROP;
237 }
236 } 238 }
237 239
238 /* The rport= parameter (RFC 3581) contains the port number 240 /* The rport= parameter (RFC 3581) contains the port number
@@ -307,7 +309,7 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
307 return; 309 return;
308 310
309 th = (struct tcphdr *)(skb->data + protoff); 311 th = (struct tcphdr *)(skb->data + protoff);
310 nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); 312 nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
311} 313}
312 314
313/* Handles expected signalling connections and media streams */ 315/* Handles expected signalling connections and media streams */
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
new file mode 100644
index 000000000000..cdf4567ba9b3
--- /dev/null
+++ b/net/netfilter/nf_synproxy_core.c
@@ -0,0 +1,434 @@
1/*
2 * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <asm/unaligned.h>
12#include <net/tcp.h>
13#include <net/netns/generic.h>
14
15#include <linux/netfilter_ipv4/ip_tables.h>
16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter/xt_tcpudp.h>
18#include <linux/netfilter/xt_SYNPROXY.h>
19#include <net/netfilter/nf_conntrack.h>
20#include <net/netfilter/nf_conntrack_extend.h>
21#include <net/netfilter/nf_conntrack_seqadj.h>
22#include <net/netfilter/nf_conntrack_synproxy.h>
23
24int synproxy_net_id;
25EXPORT_SYMBOL_GPL(synproxy_net_id);
26
27bool
28synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
29 const struct tcphdr *th, struct synproxy_options *opts)
30{
31 int length = (th->doff * 4) - sizeof(*th);
32 u8 buf[40], *ptr;
33
34 ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
35 if (ptr == NULL)
36 return false;
37
38 opts->options = 0;
39 while (length > 0) {
40 int opcode = *ptr++;
41 int opsize;
42
43 switch (opcode) {
44 case TCPOPT_EOL:
45 return true;
46 case TCPOPT_NOP:
47 length--;
48 continue;
49 default:
50 opsize = *ptr++;
51 if (opsize < 2)
52 return true;
53 if (opsize > length)
54 return true;
55
56 switch (opcode) {
57 case TCPOPT_MSS:
58 if (opsize == TCPOLEN_MSS) {
59 opts->mss = get_unaligned_be16(ptr);
60 opts->options |= XT_SYNPROXY_OPT_MSS;
61 }
62 break;
63 case TCPOPT_WINDOW:
64 if (opsize == TCPOLEN_WINDOW) {
65 opts->wscale = *ptr;
66 if (opts->wscale > 14)
67 opts->wscale = 14;
68 opts->options |= XT_SYNPROXY_OPT_WSCALE;
69 }
70 break;
71 case TCPOPT_TIMESTAMP:
72 if (opsize == TCPOLEN_TIMESTAMP) {
73 opts->tsval = get_unaligned_be32(ptr);
74 opts->tsecr = get_unaligned_be32(ptr + 4);
75 opts->options |= XT_SYNPROXY_OPT_TIMESTAMP;
76 }
77 break;
78 case TCPOPT_SACK_PERM:
79 if (opsize == TCPOLEN_SACK_PERM)
80 opts->options |= XT_SYNPROXY_OPT_SACK_PERM;
81 break;
82 }
83
84 ptr += opsize - 2;
85 length -= opsize;
86 }
87 }
88 return true;
89}
90EXPORT_SYMBOL_GPL(synproxy_parse_options);
91
92unsigned int synproxy_options_size(const struct synproxy_options *opts)
93{
94 unsigned int size = 0;
95
96 if (opts->options & XT_SYNPROXY_OPT_MSS)
97 size += TCPOLEN_MSS_ALIGNED;
98 if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
99 size += TCPOLEN_TSTAMP_ALIGNED;
100 else if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
101 size += TCPOLEN_SACKPERM_ALIGNED;
102 if (opts->options & XT_SYNPROXY_OPT_WSCALE)
103 size += TCPOLEN_WSCALE_ALIGNED;
104
105 return size;
106}
107EXPORT_SYMBOL_GPL(synproxy_options_size);
108
109void
110synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
111{
112 __be32 *ptr = (__be32 *)(th + 1);
113 u8 options = opts->options;
114
115 if (options & XT_SYNPROXY_OPT_MSS)
116 *ptr++ = htonl((TCPOPT_MSS << 24) |
117 (TCPOLEN_MSS << 16) |
118 opts->mss);
119
120 if (options & XT_SYNPROXY_OPT_TIMESTAMP) {
121 if (options & XT_SYNPROXY_OPT_SACK_PERM)
122 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
123 (TCPOLEN_SACK_PERM << 16) |
124 (TCPOPT_TIMESTAMP << 8) |
125 TCPOLEN_TIMESTAMP);
126 else
127 *ptr++ = htonl((TCPOPT_NOP << 24) |
128 (TCPOPT_NOP << 16) |
129 (TCPOPT_TIMESTAMP << 8) |
130 TCPOLEN_TIMESTAMP);
131
132 *ptr++ = htonl(opts->tsval);
133 *ptr++ = htonl(opts->tsecr);
134 } else if (options & XT_SYNPROXY_OPT_SACK_PERM)
135 *ptr++ = htonl((TCPOPT_NOP << 24) |
136 (TCPOPT_NOP << 16) |
137 (TCPOPT_SACK_PERM << 8) |
138 TCPOLEN_SACK_PERM);
139
140 if (options & XT_SYNPROXY_OPT_WSCALE)
141 *ptr++ = htonl((TCPOPT_NOP << 24) |
142 (TCPOPT_WINDOW << 16) |
143 (TCPOLEN_WINDOW << 8) |
144 opts->wscale);
145}
146EXPORT_SYMBOL_GPL(synproxy_build_options);
147
148void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
149 struct synproxy_options *opts)
150{
151 opts->tsecr = opts->tsval;
152 opts->tsval = tcp_time_stamp & ~0x3f;
153
154 if (opts->options & XT_SYNPROXY_OPT_WSCALE)
155 opts->tsval |= info->wscale;
156 else
157 opts->tsval |= 0xf;
158
159 if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
160 opts->tsval |= 1 << 4;
161
162 if (opts->options & XT_SYNPROXY_OPT_ECN)
163 opts->tsval |= 1 << 5;
164}
165EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie);
166
167void synproxy_check_timestamp_cookie(struct synproxy_options *opts)
168{
169 opts->wscale = opts->tsecr & 0xf;
170 if (opts->wscale != 0xf)
171 opts->options |= XT_SYNPROXY_OPT_WSCALE;
172
173 opts->options |= opts->tsecr & (1 << 4) ? XT_SYNPROXY_OPT_SACK_PERM : 0;
174
175 opts->options |= opts->tsecr & (1 << 5) ? XT_SYNPROXY_OPT_ECN : 0;
176}
177EXPORT_SYMBOL_GPL(synproxy_check_timestamp_cookie);
178
179unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
180 unsigned int protoff,
181 struct tcphdr *th,
182 struct nf_conn *ct,
183 enum ip_conntrack_info ctinfo,
184 const struct nf_conn_synproxy *synproxy)
185{
186 unsigned int optoff, optend;
187 u32 *ptr, old;
188
189 if (synproxy->tsoff == 0)
190 return 1;
191
192 optoff = protoff + sizeof(struct tcphdr);
193 optend = protoff + th->doff * 4;
194
195 if (!skb_make_writable(skb, optend))
196 return 0;
197
198 while (optoff < optend) {
199 unsigned char *op = skb->data + optoff;
200
201 switch (op[0]) {
202 case TCPOPT_EOL:
203 return 1;
204 case TCPOPT_NOP:
205 optoff++;
206 continue;
207 default:
208 if (optoff + 1 == optend ||
209 optoff + op[1] > optend ||
210 op[1] < 2)
211 return 0;
212 if (op[0] == TCPOPT_TIMESTAMP &&
213 op[1] == TCPOLEN_TIMESTAMP) {
214 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
215 ptr = (u32 *)&op[2];
216 old = *ptr;
217 *ptr = htonl(ntohl(*ptr) -
218 synproxy->tsoff);
219 } else {
220 ptr = (u32 *)&op[6];
221 old = *ptr;
222 *ptr = htonl(ntohl(*ptr) +
223 synproxy->tsoff);
224 }
225 inet_proto_csum_replace4(&th->check, skb,
226 old, *ptr, 0);
227 return 1;
228 }
229 optoff += op[1];
230 }
231 }
232 return 1;
233}
234EXPORT_SYMBOL_GPL(synproxy_tstamp_adjust);
235
236static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = {
237 .len = sizeof(struct nf_conn_synproxy),
238 .align = __alignof__(struct nf_conn_synproxy),
239 .id = NF_CT_EXT_SYNPROXY,
240};
241
242#ifdef CONFIG_PROC_FS
243static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos)
244{
245 struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
246 int cpu;
247
248 if (*pos == 0)
249 return SEQ_START_TOKEN;
250
251 for (cpu = *pos - 1; cpu < nr_cpu_ids; cpu++) {
252 if (!cpu_possible(cpu))
253 continue;
254 *pos = cpu + 1;
255 return per_cpu_ptr(snet->stats, cpu);
256 }
257
258 return NULL;
259}
260
261static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
262{
263 struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
264 int cpu;
265
266 for (cpu = *pos; cpu < nr_cpu_ids; cpu++) {
267 if (!cpu_possible(cpu))
268 continue;
269 *pos = cpu + 1;
270 return per_cpu_ptr(snet->stats, cpu);
271 }
272
273 return NULL;
274}
275
276static void synproxy_cpu_seq_stop(struct seq_file *seq, void *v)
277{
278 return;
279}
280
281static int synproxy_cpu_seq_show(struct seq_file *seq, void *v)
282{
283 struct synproxy_stats *stats = v;
284
285 if (v == SEQ_START_TOKEN) {
286 seq_printf(seq, "entries\t\tsyn_received\t"
287 "cookie_invalid\tcookie_valid\t"
288 "cookie_retrans\tconn_reopened\n");
289 return 0;
290 }
291
292 seq_printf(seq, "%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n", 0,
293 stats->syn_received,
294 stats->cookie_invalid,
295 stats->cookie_valid,
296 stats->cookie_retrans,
297 stats->conn_reopened);
298
299 return 0;
300}
301
302static const struct seq_operations synproxy_cpu_seq_ops = {
303 .start = synproxy_cpu_seq_start,
304 .next = synproxy_cpu_seq_next,
305 .stop = synproxy_cpu_seq_stop,
306 .show = synproxy_cpu_seq_show,
307};
308
309static int synproxy_cpu_seq_open(struct inode *inode, struct file *file)
310{
311 return seq_open_net(inode, file, &synproxy_cpu_seq_ops,
312 sizeof(struct seq_net_private));
313}
314
315static const struct file_operations synproxy_cpu_seq_fops = {
316 .owner = THIS_MODULE,
317 .open = synproxy_cpu_seq_open,
318 .read = seq_read,
319 .llseek = seq_lseek,
320 .release = seq_release_net,
321};
322
323static int __net_init synproxy_proc_init(struct net *net)
324{
325 if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat,
326 &synproxy_cpu_seq_fops))
327 return -ENOMEM;
328 return 0;
329}
330
331static void __net_exit synproxy_proc_exit(struct net *net)
332{
333 remove_proc_entry("synproxy", net->proc_net_stat);
334}
335#else
336static int __net_init synproxy_proc_init(struct net *net)
337{
338 return 0;
339}
340
341static void __net_exit synproxy_proc_exit(struct net *net)
342{
343 return;
344}
345#endif /* CONFIG_PROC_FS */
346
347static int __net_init synproxy_net_init(struct net *net)
348{
349 struct synproxy_net *snet = synproxy_pernet(net);
350 struct nf_conntrack_tuple t;
351 struct nf_conn *ct;
352 int err = -ENOMEM;
353
354 memset(&t, 0, sizeof(t));
355 ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
356 if (IS_ERR(ct)) {
357 err = PTR_ERR(ct);
358 goto err1;
359 }
360
361 if (!nfct_seqadj_ext_add(ct))
362 goto err2;
363 if (!nfct_synproxy_ext_add(ct))
364 goto err2;
365 __set_bit(IPS_TEMPLATE_BIT, &ct->status);
366 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
367
368 snet->tmpl = ct;
369
370 snet->stats = alloc_percpu(struct synproxy_stats);
371 if (snet->stats == NULL)
372 goto err2;
373
374 err = synproxy_proc_init(net);
375 if (err < 0)
376 goto err3;
377
378 return 0;
379
380err3:
381 free_percpu(snet->stats);
382err2:
383 nf_conntrack_free(ct);
384err1:
385 return err;
386}
387
388static void __net_exit synproxy_net_exit(struct net *net)
389{
390 struct synproxy_net *snet = synproxy_pernet(net);
391
392 nf_conntrack_free(snet->tmpl);
393 synproxy_proc_exit(net);
394 free_percpu(snet->stats);
395}
396
397static struct pernet_operations synproxy_net_ops = {
398 .init = synproxy_net_init,
399 .exit = synproxy_net_exit,
400 .id = &synproxy_net_id,
401 .size = sizeof(struct synproxy_net),
402};
403
404static int __init synproxy_core_init(void)
405{
406 int err;
407
408 err = nf_ct_extend_register(&nf_ct_synproxy_extend);
409 if (err < 0)
410 goto err1;
411
412 err = register_pernet_subsys(&synproxy_net_ops);
413 if (err < 0)
414 goto err2;
415
416 return 0;
417
418err2:
419 nf_ct_extend_unregister(&nf_ct_synproxy_extend);
420err1:
421 return err;
422}
423
424static void __exit synproxy_core_exit(void)
425{
426 unregister_pernet_subsys(&synproxy_net_ops);
427 nf_ct_extend_unregister(&nf_ct_synproxy_extend);
428}
429
430module_init(synproxy_core_init);
431module_exit(synproxy_core_exit);
432
433MODULE_LICENSE("GPL");
434MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
deleted file mode 100644
index 474d621cbc2e..000000000000
--- a/net/netfilter/nf_tproxy_core.c
+++ /dev/null
@@ -1,62 +0,0 @@
1/*
2 * Transparent proxy support for Linux/iptables
3 *
4 * Copyright (c) 2006-2007 BalaBit IT Ltd.
5 * Author: Balazs Scheidler, Krisztian Kovacs
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/module.h>
14
15#include <linux/net.h>
16#include <linux/if.h>
17#include <linux/netdevice.h>
18#include <net/udp.h>
19#include <net/netfilter/nf_tproxy_core.h>
20
21
22static void
23nf_tproxy_destructor(struct sk_buff *skb)
24{
25 struct sock *sk = skb->sk;
26
27 skb->sk = NULL;
28 skb->destructor = NULL;
29
30 if (sk)
31 sock_put(sk);
32}
33
34/* consumes sk */
35void
36nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
37{
38 /* assigning tw sockets complicates things; most
39 * skb->sk->X checks would have to test sk->sk_state first */
40 if (sk->sk_state == TCP_TIME_WAIT) {
41 inet_twsk_put(inet_twsk(sk));
42 return;
43 }
44
45 skb_orphan(skb);
46 skb->sk = sk;
47 skb->destructor = nf_tproxy_destructor;
48}
49EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
50
51static int __init nf_tproxy_init(void)
52{
53 pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
54 pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
55 return 0;
56}
57
58module_init(nf_tproxy_init);
59
60MODULE_LICENSE("GPL");
61MODULE_AUTHOR("Krisztian Kovacs");
62MODULE_DESCRIPTION("Transparent proxy support core routines");
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index dc3fd5d44464..c7b6d466a662 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -149,9 +149,12 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
149 149
150 rcu_read_lock(); 150 rcu_read_lock();
151 list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { 151 list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
152 if (last && cur != last) 152 if (last) {
153 continue; 153 if (cur != last)
154 continue;
154 155
156 last = NULL;
157 }
155 if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid, 158 if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
156 cb->nlh->nlmsg_seq, 159 cb->nlh->nlmsg_seq,
157 NFNL_MSG_TYPE(cb->nlh->nlmsg_type), 160 NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index a191b6db657e..9e287cb56a04 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -67,9 +67,12 @@ static int
67nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, 67nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
68 const struct nlattr *attr) 68 const struct nlattr *attr)
69{ 69{
70 int err;
70 struct nlattr *tb[NFCTH_TUPLE_MAX+1]; 71 struct nlattr *tb[NFCTH_TUPLE_MAX+1];
71 72
72 nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); 73 err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol);
74 if (err < 0)
75 return err;
73 76
74 if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) 77 if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
75 return -EINVAL; 78 return -EINVAL;
@@ -121,9 +124,12 @@ static int
121nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, 124nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
122 const struct nlattr *attr) 125 const struct nlattr *attr)
123{ 126{
127 int err;
124 struct nlattr *tb[NFCTH_POLICY_MAX+1]; 128 struct nlattr *tb[NFCTH_POLICY_MAX+1];
125 129
126 nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); 130 err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol);
131 if (err < 0)
132 return err;
127 133
128 if (!tb[NFCTH_POLICY_NAME] || 134 if (!tb[NFCTH_POLICY_NAME] ||
129 !tb[NFCTH_POLICY_EXPECT_MAX] || 135 !tb[NFCTH_POLICY_EXPECT_MAX] ||
@@ -153,8 +159,10 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
153 struct nf_conntrack_expect_policy *expect_policy; 159 struct nf_conntrack_expect_policy *expect_policy;
154 struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; 160 struct nlattr *tb[NFCTH_POLICY_SET_MAX+1];
155 161
156 nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, 162 ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
157 nfnl_cthelper_expect_policy_set); 163 nfnl_cthelper_expect_policy_set);
164 if (ret < 0)
165 return ret;
158 166
159 if (!tb[NFCTH_POLICY_SET_NUM]) 167 if (!tb[NFCTH_POLICY_SET_NUM])
160 return -EINVAL; 168 return -EINVAL;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 701c88a20fea..50580494148d 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -59,8 +59,10 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout,
59 if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) { 59 if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) {
60 struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1]; 60 struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
61 61
62 nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, 62 ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
63 attr, l4proto->ctnl_timeout.nla_policy); 63 attr, l4proto->ctnl_timeout.nla_policy);
64 if (ret < 0)
65 return ret;
64 66
65 ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, 67 ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net,
66 &timeout->data); 68 &timeout->data);
@@ -220,9 +222,12 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
220 222
221 rcu_read_lock(); 223 rcu_read_lock();
222 list_for_each_entry_rcu(cur, &cttimeout_list, head) { 224 list_for_each_entry_rcu(cur, &cttimeout_list, head) {
223 if (last && cur != last) 225 if (last) {
224 continue; 226 if (cur != last)
227 continue;
225 228
229 last = NULL;
230 }
226 if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, 231 if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid,
227 cb->nlh->nlmsg_seq, 232 cb->nlh->nlmsg_seq,
228 NFNL_MSG_TYPE(cb->nlh->nlmsg_type), 233 NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 962e9792e317..d92cc317bf8b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -419,6 +419,7 @@ __build_packet_message(struct nfnl_log_net *log,
419 nfmsg->version = NFNETLINK_V0; 419 nfmsg->version = NFNETLINK_V0;
420 nfmsg->res_id = htons(inst->group_num); 420 nfmsg->res_id = htons(inst->group_num);
421 421
422 memset(&pmsg, 0, sizeof(pmsg));
422 pmsg.hw_protocol = skb->protocol; 423 pmsg.hw_protocol = skb->protocol;
423 pmsg.hook = hooknum; 424 pmsg.hook = hooknum;
424 425
@@ -498,7 +499,10 @@ __build_packet_message(struct nfnl_log_net *log,
498 if (indev && skb->dev && 499 if (indev && skb->dev &&
499 skb->mac_header != skb->network_header) { 500 skb->mac_header != skb->network_header) {
500 struct nfulnl_msg_packet_hw phw; 501 struct nfulnl_msg_packet_hw phw;
501 int len = dev_parse_header(skb, phw.hw_addr); 502 int len;
503
504 memset(&phw, 0, sizeof(phw));
505 len = dev_parse_header(skb, phw.hw_addr);
502 if (len > 0) { 506 if (len > 0) {
503 phw.hw_addrlen = htons(len); 507 phw.hw_addrlen = htons(len);
504 if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw)) 508 if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw))
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 4e27fa035814..ae2e5c11d01a 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -41,6 +41,14 @@
41 41
42#define NFQNL_QMAX_DEFAULT 1024 42#define NFQNL_QMAX_DEFAULT 1024
43 43
44/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
45 * includes the header length. Thus, the maximum packet length that we
46 * support is 65531 bytes. We send truncated packets if the specified length
47 * is larger than that. Userspace can check for presence of NFQA_CAP_LEN
48 * attribute to detect truncation.
49 */
50#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
51
44struct nfqnl_instance { 52struct nfqnl_instance {
45 struct hlist_node hlist; /* global list of queues */ 53 struct hlist_node hlist; /* global list of queues */
46 struct rcu_head rcu; 54 struct rcu_head rcu;
@@ -122,7 +130,7 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
122 inst->queue_num = queue_num; 130 inst->queue_num = queue_num;
123 inst->peer_portid = portid; 131 inst->peer_portid = portid;
124 inst->queue_maxlen = NFQNL_QMAX_DEFAULT; 132 inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
125 inst->copy_range = 0xffff; 133 inst->copy_range = NFQNL_MAX_COPY_RANGE;
126 inst->copy_mode = NFQNL_COPY_NONE; 134 inst->copy_mode = NFQNL_COPY_NONE;
127 spin_lock_init(&inst->lock); 135 spin_lock_init(&inst->lock);
128 INIT_LIST_HEAD(&inst->queue_list); 136 INIT_LIST_HEAD(&inst->queue_list);
@@ -272,12 +280,17 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
272 skb_shinfo(to)->nr_frags = j; 280 skb_shinfo(to)->nr_frags = j;
273} 281}
274 282
275static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet) 283static int
284nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
285 bool csum_verify)
276{ 286{
277 __u32 flags = 0; 287 __u32 flags = 0;
278 288
279 if (packet->ip_summed == CHECKSUM_PARTIAL) 289 if (packet->ip_summed == CHECKSUM_PARTIAL)
280 flags = NFQA_SKB_CSUMNOTREADY; 290 flags = NFQA_SKB_CSUMNOTREADY;
291 else if (csum_verify)
292 flags = NFQA_SKB_CSUM_NOTVERIFIED;
293
281 if (skb_is_gso(packet)) 294 if (skb_is_gso(packet))
282 flags |= NFQA_SKB_GSO; 295 flags |= NFQA_SKB_GSO;
283 296
@@ -302,6 +315,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
302 struct net_device *outdev; 315 struct net_device *outdev;
303 struct nf_conn *ct = NULL; 316 struct nf_conn *ct = NULL;
304 enum ip_conntrack_info uninitialized_var(ctinfo); 317 enum ip_conntrack_info uninitialized_var(ctinfo);
318 bool csum_verify;
305 319
306 size = nlmsg_total_size(sizeof(struct nfgenmsg)) 320 size = nlmsg_total_size(sizeof(struct nfgenmsg))
307 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) 321 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -319,6 +333,12 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
319 if (entskb->tstamp.tv64) 333 if (entskb->tstamp.tv64)
320 size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); 334 size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
321 335
336 if (entry->hook <= NF_INET_FORWARD ||
337 (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
338 csum_verify = !skb_csum_unnecessary(entskb);
339 else
340 csum_verify = false;
341
322 outdev = entry->outdev; 342 outdev = entry->outdev;
323 343
324 switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { 344 switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
@@ -333,10 +353,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
333 return NULL; 353 return NULL;
334 354
335 data_len = ACCESS_ONCE(queue->copy_range); 355 data_len = ACCESS_ONCE(queue->copy_range);
336 if (data_len == 0 || data_len > entskb->len) 356 if (data_len > entskb->len)
337 data_len = entskb->len; 357 data_len = entskb->len;
338 358
339
340 if (!entskb->head_frag || 359 if (!entskb->head_frag ||
341 skb_headlen(entskb) < L1_CACHE_BYTES || 360 skb_headlen(entskb) < L1_CACHE_BYTES ||
342 skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) 361 skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
@@ -444,7 +463,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
444 if (indev && entskb->dev && 463 if (indev && entskb->dev &&
445 entskb->mac_header != entskb->network_header) { 464 entskb->mac_header != entskb->network_header) {
446 struct nfqnl_msg_packet_hw phw; 465 struct nfqnl_msg_packet_hw phw;
447 int len = dev_parse_header(entskb, phw.hw_addr); 466 int len;
467
468 memset(&phw, 0, sizeof(phw));
469 len = dev_parse_header(entskb, phw.hw_addr);
448 if (len) { 470 if (len) {
449 phw.hw_addrlen = htons(len); 471 phw.hw_addrlen = htons(len);
450 if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw)) 472 if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw))
@@ -465,10 +487,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
465 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) 487 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
466 goto nla_put_failure; 488 goto nla_put_failure;
467 489
468 if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) 490 if (cap_len > data_len &&
491 nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
469 goto nla_put_failure; 492 goto nla_put_failure;
470 493
471 if (nfqnl_put_packet_info(skb, entskb)) 494 if (nfqnl_put_packet_info(skb, entskb, csum_verify))
472 goto nla_put_failure; 495 goto nla_put_failure;
473 496
474 if (data_len) { 497 if (data_len) {
@@ -509,10 +532,6 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
509 } 532 }
510 spin_lock_bh(&queue->lock); 533 spin_lock_bh(&queue->lock);
511 534
512 if (!queue->peer_portid) {
513 err = -EINVAL;
514 goto err_out_free_nskb;
515 }
516 if (queue->queue_total >= queue->queue_maxlen) { 535 if (queue->queue_total >= queue->queue_maxlen) {
517 if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { 536 if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
518 failopen = 1; 537 failopen = 1;
@@ -637,9 +656,6 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
637 if (queue->copy_mode == NFQNL_COPY_NONE) 656 if (queue->copy_mode == NFQNL_COPY_NONE)
638 return -EINVAL; 657 return -EINVAL;
639 658
640 if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(entry->skb))
641 return __nfqnl_enqueue_packet(net, queue, entry);
642
643 skb = entry->skb; 659 skb = entry->skb;
644 660
645 switch (entry->pf) { 661 switch (entry->pf) {
@@ -651,6 +667,9 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
651 break; 667 break;
652 } 668 }
653 669
670 if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
671 return __nfqnl_enqueue_packet(net, queue, entry);
672
654 nf_bridge_adjust_skb_data(skb); 673 nf_bridge_adjust_skb_data(skb);
655 segs = skb_gso_segment(skb, 0); 674 segs = skb_gso_segment(skb, 0);
656 /* Does not use PTR_ERR to limit the number of error codes that can be 675 /* Does not use PTR_ERR to limit the number of error codes that can be
@@ -731,13 +750,8 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
731 750
732 case NFQNL_COPY_PACKET: 751 case NFQNL_COPY_PACKET:
733 queue->copy_mode = mode; 752 queue->copy_mode = mode;
734 /* We're using struct nlattr which has 16bit nla_len. Note that 753 if (range == 0 || range > NFQNL_MAX_COPY_RANGE)
735 * nla_len includes the header length. Thus, the maximum packet 754 queue->copy_range = NFQNL_MAX_COPY_RANGE;
736 * length that we support is 65531 bytes. We send truncated
737 * packets if the specified length is larger than that.
738 */
739 if (range > 0xffff - NLA_HDRLEN)
740 queue->copy_range = 0xffff - NLA_HDRLEN;
741 else 755 else
742 queue->copy_range = range; 756 queue->copy_range = range;
743 break; 757 break;
@@ -800,7 +814,7 @@ static int
800nfqnl_rcv_dev_event(struct notifier_block *this, 814nfqnl_rcv_dev_event(struct notifier_block *this,
801 unsigned long event, void *ptr) 815 unsigned long event, void *ptr)
802{ 816{
803 struct net_device *dev = ptr; 817 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
804 818
805 /* Drop any packets associated with the downed device */ 819 /* Drop any packets associated with the downed device */
806 if (event == NETDEV_DOWN) 820 if (event == NETDEV_DOWN)
@@ -848,6 +862,7 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
848 [NFQA_MARK] = { .type = NLA_U32 }, 862 [NFQA_MARK] = { .type = NLA_U32 },
849 [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, 863 [NFQA_PAYLOAD] = { .type = NLA_UNSPEC },
850 [NFQA_CT] = { .type = NLA_UNSPEC }, 864 [NFQA_CT] = { .type = NLA_UNSPEC },
865 [NFQA_EXP] = { .type = NLA_UNSPEC },
851}; 866};
852 867
853static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { 868static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
@@ -976,9 +991,14 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
976 if (entry == NULL) 991 if (entry == NULL)
977 return -ENOENT; 992 return -ENOENT;
978 993
979 rcu_read_lock(); 994 if (nfqa[NFQA_CT]) {
980 if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK))
981 ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo); 995 ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
996 if (ct && nfqa[NFQA_EXP]) {
997 nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
998 NETLINK_CB(skb).portid,
999 nlmsg_report(nlh));
1000 }
1001 }
982 1002
983 if (nfqa[NFQA_PAYLOAD]) { 1003 if (nfqa[NFQA_PAYLOAD]) {
984 u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); 1004 u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
@@ -989,9 +1009,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
989 verdict = NF_DROP; 1009 verdict = NF_DROP;
990 1010
991 if (ct) 1011 if (ct)
992 nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff); 1012 nfqnl_ct_seq_adjust(entry->skb, ct, ctinfo, diff);
993 } 1013 }
994 rcu_read_unlock();
995 1014
996 if (nfqa[NFQA_MARK]) 1015 if (nfqa[NFQA_MARK])
997 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); 1016 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
index ab61d66bc0b9..96cac50e0d12 100644
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ b/net/netfilter/nfnetlink_queue_ct.c
@@ -87,12 +87,27 @@ nla_put_failure:
87void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, 87void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
88 enum ip_conntrack_info ctinfo, int diff) 88 enum ip_conntrack_info ctinfo, int diff)
89{ 89{
90 struct nfq_ct_nat_hook *nfq_nat_ct; 90 struct nfq_ct_hook *nfq_ct;
91 91
92 nfq_nat_ct = rcu_dereference(nfq_ct_nat_hook); 92 nfq_ct = rcu_dereference(nfq_ct_hook);
93 if (nfq_nat_ct == NULL) 93 if (nfq_ct == NULL)
94 return; 94 return;
95 95
96 if ((ct->status & IPS_NAT_MASK) && diff) 96 if ((ct->status & IPS_NAT_MASK) && diff)
97 nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff); 97 nfq_ct->seq_adjust(skb, ct, ctinfo, diff);
98}
99
100int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
101 u32 portid, u32 report)
102{
103 struct nfq_ct_hook *nfq_ct;
104
105 if (nf_ct_is_untracked(ct))
106 return 0;
107
108 nfq_ct = rcu_dereference(nfq_ct_hook);
109 if (nfq_ct == NULL)
110 return -EOPNOTSUPP;
111
112 return nfq_ct->attach_expect(attr, ct, portid, report);
98} 113}
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index a60261cb0e80..da35ac06a975 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -26,6 +26,9 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
26 if (skb->nfct != NULL) 26 if (skb->nfct != NULL)
27 return XT_CONTINUE; 27 return XT_CONTINUE;
28 28
29 /* special case the untracked ct : we want the percpu object */
30 if (!ct)
31 ct = nf_ct_untracked_get();
29 atomic_inc(&ct->ct_general.use); 32 atomic_inc(&ct->ct_general.use);
30 skb->nfct = &ct->ct_general; 33 skb->nfct = &ct->ct_general;
31 skb->nfctinfo = IP_CT_NEW; 34 skb->nfctinfo = IP_CT_NEW;
@@ -186,8 +189,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
186 int ret = -EOPNOTSUPP; 189 int ret = -EOPNOTSUPP;
187 190
188 if (info->flags & XT_CT_NOTRACK) { 191 if (info->flags & XT_CT_NOTRACK) {
189 ct = nf_ct_untracked_get(); 192 ct = NULL;
190 atomic_inc(&ct->ct_general.use);
191 goto out; 193 goto out;
192 } 194 }
193 195
@@ -311,7 +313,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
311 struct nf_conn *ct = info->ct; 313 struct nf_conn *ct = info->ct;
312 struct nf_conn_help *help; 314 struct nf_conn_help *help;
313 315
314 if (!nf_ct_is_untracked(ct)) { 316 if (ct && !nf_ct_is_untracked(ct)) {
315 help = nfct_help(ct); 317 help = nfct_help(ct);
316 if (help) 318 if (help)
317 module_put(help->helper->me); 319 module_put(help->helper->me);
@@ -319,8 +321,8 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
319 nf_ct_l3proto_module_put(par->family); 321 nf_ct_l3proto_module_put(par->family);
320 322
321 xt_ct_destroy_timeout(ct); 323 xt_ct_destroy_timeout(ct);
324 nf_ct_put(info->ct);
322 } 325 }
323 nf_ct_put(info->ct);
324} 326}
325 327
326static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) 328static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index a75240f0d42b..cd24290f3b2f 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -45,25 +45,34 @@ optlen(const u_int8_t *opt, unsigned int offset)
45 45
46static int 46static int
47tcpmss_mangle_packet(struct sk_buff *skb, 47tcpmss_mangle_packet(struct sk_buff *skb,
48 const struct xt_tcpmss_info *info, 48 const struct xt_action_param *par,
49 unsigned int in_mtu, 49 unsigned int in_mtu,
50 unsigned int tcphoff, 50 unsigned int tcphoff,
51 unsigned int minlen) 51 unsigned int minlen)
52{ 52{
53 const struct xt_tcpmss_info *info = par->targinfo;
53 struct tcphdr *tcph; 54 struct tcphdr *tcph;
54 unsigned int tcplen, i; 55 int len, tcp_hdrlen;
56 unsigned int i;
55 __be16 oldval; 57 __be16 oldval;
56 u16 newmss; 58 u16 newmss;
57 u8 *opt; 59 u8 *opt;
58 60
61 /* This is a fragment, no TCP header is available */
62 if (par->fragoff != 0)
63 return 0;
64
59 if (!skb_make_writable(skb, skb->len)) 65 if (!skb_make_writable(skb, skb->len))
60 return -1; 66 return -1;
61 67
62 tcplen = skb->len - tcphoff; 68 len = skb->len - tcphoff;
69 if (len < (int)sizeof(struct tcphdr))
70 return -1;
71
63 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); 72 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
73 tcp_hdrlen = tcph->doff * 4;
64 74
65 /* Header cannot be larger than the packet */ 75 if (len < tcp_hdrlen)
66 if (tcplen < tcph->doff*4)
67 return -1; 76 return -1;
68 77
69 if (info->mss == XT_TCPMSS_CLAMP_PMTU) { 78 if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
@@ -82,9 +91,8 @@ tcpmss_mangle_packet(struct sk_buff *skb,
82 newmss = info->mss; 91 newmss = info->mss;
83 92
84 opt = (u_int8_t *)tcph; 93 opt = (u_int8_t *)tcph;
85 for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { 94 for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) {
86 if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && 95 if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) {
87 opt[i+1] == TCPOLEN_MSS) {
88 u_int16_t oldmss; 96 u_int16_t oldmss;
89 97
90 oldmss = (opt[i+2] << 8) | opt[i+3]; 98 oldmss = (opt[i+2] << 8) | opt[i+3];
@@ -107,9 +115,10 @@ tcpmss_mangle_packet(struct sk_buff *skb,
107 } 115 }
108 116
109 /* There is data after the header so the option can't be added 117 /* There is data after the header so the option can't be added
110 without moving it, and doing so may make the SYN packet 118 * without moving it, and doing so may make the SYN packet
111 itself too large. Accept the packet unmodified instead. */ 119 * itself too large. Accept the packet unmodified instead.
112 if (tcplen > tcph->doff*4) 120 */
121 if (len > tcp_hdrlen)
113 return 0; 122 return 0;
114 123
115 /* 124 /*
@@ -125,11 +134,23 @@ tcpmss_mangle_packet(struct sk_buff *skb,
125 134
126 skb_put(skb, TCPOLEN_MSS); 135 skb_put(skb, TCPOLEN_MSS);
127 136
137 /*
138 * IPv4: RFC 1122 states "If an MSS option is not received at
139 * connection setup, TCP MUST assume a default send MSS of 536".
140 * IPv6: RFC 2460 states IPv6 has a minimum MTU of 1280 and a minimum
141 * length IPv6 header of 60, ergo the default MSS value is 1220
142 * Since no MSS was provided, we must use the default values
143 */
144 if (par->family == NFPROTO_IPV4)
145 newmss = min(newmss, (u16)536);
146 else
147 newmss = min(newmss, (u16)1220);
148
128 opt = (u_int8_t *)tcph + sizeof(struct tcphdr); 149 opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
129 memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); 150 memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr));
130 151
131 inet_proto_csum_replace2(&tcph->check, skb, 152 inet_proto_csum_replace2(&tcph->check, skb,
132 htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); 153 htons(len), htons(len + TCPOLEN_MSS), 1);
133 opt[0] = TCPOPT_MSS; 154 opt[0] = TCPOPT_MSS;
134 opt[1] = TCPOLEN_MSS; 155 opt[1] = TCPOLEN_MSS;
135 opt[2] = (newmss & 0xff00) >> 8; 156 opt[2] = (newmss & 0xff00) >> 8;
@@ -182,7 +203,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
182 __be16 newlen; 203 __be16 newlen;
183 int ret; 204 int ret;
184 205
185 ret = tcpmss_mangle_packet(skb, par->targinfo, 206 ret = tcpmss_mangle_packet(skb, par,
186 tcpmss_reverse_mtu(skb, PF_INET), 207 tcpmss_reverse_mtu(skb, PF_INET),
187 iph->ihl * 4, 208 iph->ihl * 4,
188 sizeof(*iph) + sizeof(struct tcphdr)); 209 sizeof(*iph) + sizeof(struct tcphdr));
@@ -211,7 +232,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
211 tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off); 232 tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);
212 if (tcphoff < 0) 233 if (tcphoff < 0)
213 return NF_DROP; 234 return NF_DROP;
214 ret = tcpmss_mangle_packet(skb, par->targinfo, 235 ret = tcpmss_mangle_packet(skb, par,
215 tcpmss_reverse_mtu(skb, PF_INET6), 236 tcpmss_reverse_mtu(skb, PF_INET6),
216 tcphoff, 237 tcphoff,
217 sizeof(*ipv6h) + sizeof(struct tcphdr)); 238 sizeof(*ipv6h) + sizeof(struct tcphdr));
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 1eb1a44bfd3d..625fa1d636a0 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -38,7 +38,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
38 struct tcphdr *tcph; 38 struct tcphdr *tcph;
39 u_int16_t n, o; 39 u_int16_t n, o;
40 u_int8_t *opt; 40 u_int8_t *opt;
41 int len; 41 int len, tcp_hdrlen;
42 42
43 /* This is a fragment, no TCP header is available */ 43 /* This is a fragment, no TCP header is available */
44 if (par->fragoff != 0) 44 if (par->fragoff != 0)
@@ -48,21 +48,25 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
48 return NF_DROP; 48 return NF_DROP;
49 49
50 len = skb->len - tcphoff; 50 len = skb->len - tcphoff;
51 if (len < (int)sizeof(struct tcphdr) || 51 if (len < (int)sizeof(struct tcphdr))
52 tcp_hdr(skb)->doff * 4 > len)
53 return NF_DROP; 52 return NF_DROP;
54 53
55 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); 54 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
55 tcp_hdrlen = tcph->doff * 4;
56
57 if (len < tcp_hdrlen)
58 return NF_DROP;
59
56 opt = (u_int8_t *)tcph; 60 opt = (u_int8_t *)tcph;
57 61
58 /* 62 /*
59 * Walk through all TCP options - if we find some option to remove, 63 * Walk through all TCP options - if we find some option to remove,
60 * set all octets to %TCPOPT_NOP and adjust checksum. 64 * set all octets to %TCPOPT_NOP and adjust checksum.
61 */ 65 */
62 for (i = sizeof(struct tcphdr); i < tcp_hdrlen(skb); i += optl) { 66 for (i = sizeof(struct tcphdr); i < tcp_hdrlen - 1; i += optl) {
63 optl = optlen(opt, i); 67 optl = optlen(opt, i);
64 68
65 if (i + optl > tcp_hdrlen(skb)) 69 if (i + optl > tcp_hdrlen)
66 break; 70 break;
67 71
68 if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i])) 72 if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i]))
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index bd93e51d30ac..292934d23482 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -200,7 +200,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
200static int tee_netdev_event(struct notifier_block *this, unsigned long event, 200static int tee_netdev_event(struct notifier_block *this, unsigned long event,
201 void *ptr) 201 void *ptr)
202{ 202{
203 struct net_device *dev = ptr; 203 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
204 struct xt_tee_priv *priv; 204 struct xt_tee_priv *priv;
205 205
206 priv = container_of(this, struct xt_tee_priv, notifier); 206 priv = container_of(this, struct xt_tee_priv, notifier);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d7f195388f66..5d8a3a3cd5a7 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -15,7 +15,9 @@
15#include <linux/ip.h> 15#include <linux/ip.h>
16#include <net/checksum.h> 16#include <net/checksum.h>
17#include <net/udp.h> 17#include <net/udp.h>
18#include <net/tcp.h>
18#include <net/inet_sock.h> 19#include <net/inet_sock.h>
20#include <net/inet_hashtables.h>
19#include <linux/inetdevice.h> 21#include <linux/inetdevice.h>
20#include <linux/netfilter/x_tables.h> 22#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter_ipv4/ip_tables.h> 23#include <linux/netfilter_ipv4/ip_tables.h>
@@ -26,13 +28,18 @@
26#define XT_TPROXY_HAVE_IPV6 1 28#define XT_TPROXY_HAVE_IPV6 1
27#include <net/if_inet6.h> 29#include <net/if_inet6.h>
28#include <net/addrconf.h> 30#include <net/addrconf.h>
31#include <net/inet6_hashtables.h>
29#include <linux/netfilter_ipv6/ip6_tables.h> 32#include <linux/netfilter_ipv6/ip6_tables.h>
30#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 33#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
31#endif 34#endif
32 35
33#include <net/netfilter/nf_tproxy_core.h>
34#include <linux/netfilter/xt_TPROXY.h> 36#include <linux/netfilter/xt_TPROXY.h>
35 37
38enum nf_tproxy_lookup_t {
39 NFT_LOOKUP_LISTENER,
40 NFT_LOOKUP_ESTABLISHED,
41};
42
36static bool tproxy_sk_is_transparent(struct sock *sk) 43static bool tproxy_sk_is_transparent(struct sock *sk)
37{ 44{
38 if (sk->sk_state != TCP_TIME_WAIT) { 45 if (sk->sk_state != TCP_TIME_WAIT) {
@@ -68,6 +75,157 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
68 return laddr ? laddr : daddr; 75 return laddr ? laddr : daddr;
69} 76}
70 77
78/*
79 * This is used when the user wants to intercept a connection matching
80 * an explicit iptables rule. In this case the sockets are assumed
81 * matching in preference order:
82 *
83 * - match: if there's a fully established connection matching the
84 * _packet_ tuple, it is returned, assuming the redirection
85 * already took place and we process a packet belonging to an
86 * established connection
87 *
88 * - match: if there's a listening socket matching the redirection
89 * (e.g. on-port & on-ip of the connection), it is returned,
90 * regardless if it was bound to 0.0.0.0 or an explicit
91 * address. The reasoning is that if there's an explicit rule, it
92 * does not really matter if the listener is bound to an interface
93 * or to 0. The user already stated that he wants redirection
94 * (since he added the rule).
95 *
96 * Please note that there's an overlap between what a TPROXY target
97 * and a socket match will match. Normally if you have both rules the
98 * "socket" match will be the first one, effectively all packets
99 * belonging to established connections going through that one.
100 */
101static inline struct sock *
102nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
103 const __be32 saddr, const __be32 daddr,
104 const __be16 sport, const __be16 dport,
105 const struct net_device *in,
106 const enum nf_tproxy_lookup_t lookup_type)
107{
108 struct sock *sk;
109
110 switch (protocol) {
111 case IPPROTO_TCP:
112 switch (lookup_type) {
113 case NFT_LOOKUP_LISTENER:
114 sk = inet_lookup_listener(net, &tcp_hashinfo,
115 saddr, sport,
116 daddr, dport,
117 in->ifindex);
118
119 /* NOTE: we return listeners even if bound to
120 * 0.0.0.0, those are filtered out in
121 * xt_socket, since xt_TPROXY needs 0 bound
122 * listeners too
123 */
124 break;
125 case NFT_LOOKUP_ESTABLISHED:
126 sk = inet_lookup_established(net, &tcp_hashinfo,
127 saddr, sport, daddr, dport,
128 in->ifindex);
129 break;
130 default:
131 BUG();
132 }
133 break;
134 case IPPROTO_UDP:
135 sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
136 in->ifindex);
137 if (sk) {
138 int connected = (sk->sk_state == TCP_ESTABLISHED);
139 int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
140
141 /* NOTE: we return listeners even if bound to
142 * 0.0.0.0, those are filtered out in
143 * xt_socket, since xt_TPROXY needs 0 bound
144 * listeners too
145 */
146 if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
147 (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
148 sock_put(sk);
149 sk = NULL;
150 }
151 }
152 break;
153 default:
154 WARN_ON(1);
155 sk = NULL;
156 }
157
158 pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
159 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
160
161 return sk;
162}
163
164#ifdef XT_TPROXY_HAVE_IPV6
165static inline struct sock *
166nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
167 const struct in6_addr *saddr, const struct in6_addr *daddr,
168 const __be16 sport, const __be16 dport,
169 const struct net_device *in,
170 const enum nf_tproxy_lookup_t lookup_type)
171{
172 struct sock *sk;
173
174 switch (protocol) {
175 case IPPROTO_TCP:
176 switch (lookup_type) {
177 case NFT_LOOKUP_LISTENER:
178 sk = inet6_lookup_listener(net, &tcp_hashinfo,
179 saddr, sport,
180 daddr, ntohs(dport),
181 in->ifindex);
182
183 /* NOTE: we return listeners even if bound to
184 * 0.0.0.0, those are filtered out in
185 * xt_socket, since xt_TPROXY needs 0 bound
186 * listeners too
187 */
188 break;
189 case NFT_LOOKUP_ESTABLISHED:
190 sk = __inet6_lookup_established(net, &tcp_hashinfo,
191 saddr, sport, daddr, ntohs(dport),
192 in->ifindex);
193 break;
194 default:
195 BUG();
196 }
197 break;
198 case IPPROTO_UDP:
199 sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
200 in->ifindex);
201 if (sk) {
202 int connected = (sk->sk_state == TCP_ESTABLISHED);
203 int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
204
205 /* NOTE: we return listeners even if bound to
206 * 0.0.0.0, those are filtered out in
207 * xt_socket, since xt_TPROXY needs 0 bound
208 * listeners too
209 */
210 if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
211 (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
212 sock_put(sk);
213 sk = NULL;
214 }
215 }
216 break;
217 default:
218 WARN_ON(1);
219 sk = NULL;
220 }
221
222 pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
223 protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
224
225 return sk;
226}
227#endif
228
71/** 229/**
72 * tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections 230 * tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
73 * @skb: The skb being processed. 231 * @skb: The skb being processed.
@@ -117,6 +275,15 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
117 return sk; 275 return sk;
118} 276}
119 277
278/* assign a socket to the skb -- consumes sk */
279static void
280nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
281{
282 skb_orphan(skb);
283 skb->sk = sk;
284 skb->destructor = sock_edemux;
285}
286
120static unsigned int 287static unsigned int
121tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, 288tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
122 u_int32_t mark_mask, u_int32_t mark_value) 289 u_int32_t mark_mask, u_int32_t mark_value)
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 68ff29f60867..fab6eea1bf38 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -202,7 +202,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
202 return -EINVAL; 202 return -EINVAL;
203 } 203 }
204 if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { 204 if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
205 pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n"); 205 pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
206 return -EINVAL; 206 return -EINVAL;
207 } 207 }
208 if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { 208 if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index ed0db15ab00e..7720b036d76a 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -18,7 +18,7 @@ static bool
18xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) 18xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
19{ 19{
20 const struct xt_rateest_match_info *info = par->matchinfo; 20 const struct xt_rateest_match_info *info = par->matchinfo;
21 struct gnet_stats_rate_est *r; 21 struct gnet_stats_rate_est64 *r;
22 u_int32_t bps1, bps2, pps1, pps2; 22 u_int32_t bps1, bps2, pps1, pps2;
23 bool ret = true; 23 bool ret = true;
24 24
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 63b2bdb59e95..06df2b9110f5 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -19,12 +19,12 @@
19#include <net/icmp.h> 19#include <net/icmp.h>
20#include <net/sock.h> 20#include <net/sock.h>
21#include <net/inet_sock.h> 21#include <net/inet_sock.h>
22#include <net/netfilter/nf_tproxy_core.h>
23#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 22#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24 23
25#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 24#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
26#define XT_SOCKET_HAVE_IPV6 1 25#define XT_SOCKET_HAVE_IPV6 1
27#include <linux/netfilter_ipv6/ip6_tables.h> 26#include <linux/netfilter_ipv6/ip6_tables.h>
27#include <net/inet6_hashtables.h>
28#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 28#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
29#endif 29#endif
30 30
@@ -101,13 +101,50 @@ extract_icmp4_fields(const struct sk_buff *skb,
101 return 0; 101 return 0;
102} 102}
103 103
104/* "socket" match based redirection (no specific rule)
105 * ===================================================
106 *
107 * There are connections with dynamic endpoints (e.g. FTP data
108 * connection) that the user is unable to add explicit rules
109 * for. These are taken care of by a generic "socket" rule. It is
110 * assumed that the proxy application is trusted to open such
111 * connections without explicit iptables rule (except of course the
112 * generic 'socket' rule). In this case the following sockets are
113 * matched in preference order:
114 *
115 * - match: if there's a fully established connection matching the
116 * _packet_ tuple
117 *
118 * - match: if there's a non-zero bound listener (possibly with a
119 * non-local address) We don't accept zero-bound listeners, since
120 * then local services could intercept traffic going through the
121 * box.
122 */
123static struct sock *
124xt_socket_get_sock_v4(struct net *net, const u8 protocol,
125 const __be32 saddr, const __be32 daddr,
126 const __be16 sport, const __be16 dport,
127 const struct net_device *in)
128{
129 switch (protocol) {
130 case IPPROTO_TCP:
131 return __inet_lookup(net, &tcp_hashinfo,
132 saddr, sport, daddr, dport,
133 in->ifindex);
134 case IPPROTO_UDP:
135 return udp4_lib_lookup(net, saddr, sport, daddr, dport,
136 in->ifindex);
137 }
138 return NULL;
139}
140
104static bool 141static bool
105socket_match(const struct sk_buff *skb, struct xt_action_param *par, 142socket_match(const struct sk_buff *skb, struct xt_action_param *par,
106 const struct xt_socket_mtinfo1 *info) 143 const struct xt_socket_mtinfo1 *info)
107{ 144{
108 const struct iphdr *iph = ip_hdr(skb); 145 const struct iphdr *iph = ip_hdr(skb);
109 struct udphdr _hdr, *hp = NULL; 146 struct udphdr _hdr, *hp = NULL;
110 struct sock *sk; 147 struct sock *sk = skb->sk;
111 __be32 uninitialized_var(daddr), uninitialized_var(saddr); 148 __be32 uninitialized_var(daddr), uninitialized_var(saddr);
112 __be16 uninitialized_var(dport), uninitialized_var(sport); 149 __be16 uninitialized_var(dport), uninitialized_var(sport);
113 u8 uninitialized_var(protocol); 150 u8 uninitialized_var(protocol);
@@ -155,25 +192,31 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
155 } 192 }
156#endif 193#endif
157 194
158 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, 195 if (!sk)
159 saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); 196 sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
160 if (sk != NULL) { 197 saddr, daddr, sport, dport,
198 par->in);
199 if (sk) {
161 bool wildcard; 200 bool wildcard;
162 bool transparent = true; 201 bool transparent = true;
163 202
164 /* Ignore sockets listening on INADDR_ANY */ 203 /* Ignore sockets listening on INADDR_ANY,
165 wildcard = (sk->sk_state != TCP_TIME_WAIT && 204 * unless XT_SOCKET_NOWILDCARD is set
205 */
206 wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
207 sk->sk_state != TCP_TIME_WAIT &&
166 inet_sk(sk)->inet_rcv_saddr == 0); 208 inet_sk(sk)->inet_rcv_saddr == 0);
167 209
168 /* Ignore non-transparent sockets, 210 /* Ignore non-transparent sockets,
169 if XT_SOCKET_TRANSPARENT is used */ 211 if XT_SOCKET_TRANSPARENT is used */
170 if (info && info->flags & XT_SOCKET_TRANSPARENT) 212 if (info->flags & XT_SOCKET_TRANSPARENT)
171 transparent = ((sk->sk_state != TCP_TIME_WAIT && 213 transparent = ((sk->sk_state != TCP_TIME_WAIT &&
172 inet_sk(sk)->transparent) || 214 inet_sk(sk)->transparent) ||
173 (sk->sk_state == TCP_TIME_WAIT && 215 (sk->sk_state == TCP_TIME_WAIT &&
174 inet_twsk(sk)->tw_transparent)); 216 inet_twsk(sk)->tw_transparent));
175 217
176 xt_socket_put_sk(sk); 218 if (sk != skb->sk)
219 xt_socket_put_sk(sk);
177 220
178 if (wildcard || !transparent) 221 if (wildcard || !transparent)
179 sk = NULL; 222 sk = NULL;
@@ -190,11 +233,15 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
190static bool 233static bool
191socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) 234socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
192{ 235{
193 return socket_match(skb, par, NULL); 236 static struct xt_socket_mtinfo1 xt_info_v0 = {
237 .flags = 0,
238 };
239
240 return socket_match(skb, par, &xt_info_v0);
194} 241}
195 242
196static bool 243static bool
197socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par) 244socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
198{ 245{
199 return socket_match(skb, par, par->matchinfo); 246 return socket_match(skb, par, par->matchinfo);
200} 247}
@@ -255,12 +302,31 @@ extract_icmp6_fields(const struct sk_buff *skb,
255 return 0; 302 return 0;
256} 303}
257 304
305static struct sock *
306xt_socket_get_sock_v6(struct net *net, const u8 protocol,
307 const struct in6_addr *saddr, const struct in6_addr *daddr,
308 const __be16 sport, const __be16 dport,
309 const struct net_device *in)
310{
311 switch (protocol) {
312 case IPPROTO_TCP:
313 return inet6_lookup(net, &tcp_hashinfo,
314 saddr, sport, daddr, dport,
315 in->ifindex);
316 case IPPROTO_UDP:
317 return udp6_lib_lookup(net, saddr, sport, daddr, dport,
318 in->ifindex);
319 }
320
321 return NULL;
322}
323
258static bool 324static bool
259socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) 325socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
260{ 326{
261 struct ipv6hdr *iph = ipv6_hdr(skb); 327 struct ipv6hdr *iph = ipv6_hdr(skb);
262 struct udphdr _hdr, *hp = NULL; 328 struct udphdr _hdr, *hp = NULL;
263 struct sock *sk; 329 struct sock *sk = skb->sk;
264 struct in6_addr *daddr = NULL, *saddr = NULL; 330 struct in6_addr *daddr = NULL, *saddr = NULL;
265 __be16 uninitialized_var(dport), uninitialized_var(sport); 331 __be16 uninitialized_var(dport), uninitialized_var(sport);
266 int thoff = 0, uninitialized_var(tproto); 332 int thoff = 0, uninitialized_var(tproto);
@@ -291,25 +357,31 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
291 return false; 357 return false;
292 } 358 }
293 359
294 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, 360 if (!sk)
295 saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); 361 sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
296 if (sk != NULL) { 362 saddr, daddr, sport, dport,
363 par->in);
364 if (sk) {
297 bool wildcard; 365 bool wildcard;
298 bool transparent = true; 366 bool transparent = true;
299 367
300 /* Ignore sockets listening on INADDR_ANY */ 368 /* Ignore sockets listening on INADDR_ANY
301 wildcard = (sk->sk_state != TCP_TIME_WAIT && 369 * unless XT_SOCKET_NOWILDCARD is set
370 */
371 wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
372 sk->sk_state != TCP_TIME_WAIT &&
302 ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); 373 ipv6_addr_any(&inet6_sk(sk)->rcv_saddr));
303 374
304 /* Ignore non-transparent sockets, 375 /* Ignore non-transparent sockets,
305 if XT_SOCKET_TRANSPARENT is used */ 376 if XT_SOCKET_TRANSPARENT is used */
306 if (info && info->flags & XT_SOCKET_TRANSPARENT) 377 if (info->flags & XT_SOCKET_TRANSPARENT)
307 transparent = ((sk->sk_state != TCP_TIME_WAIT && 378 transparent = ((sk->sk_state != TCP_TIME_WAIT &&
308 inet_sk(sk)->transparent) || 379 inet_sk(sk)->transparent) ||
309 (sk->sk_state == TCP_TIME_WAIT && 380 (sk->sk_state == TCP_TIME_WAIT &&
310 inet_twsk(sk)->tw_transparent)); 381 inet_twsk(sk)->tw_transparent));
311 382
312 xt_socket_put_sk(sk); 383 if (sk != skb->sk)
384 xt_socket_put_sk(sk);
313 385
314 if (wildcard || !transparent) 386 if (wildcard || !transparent)
315 sk = NULL; 387 sk = NULL;
@@ -325,6 +397,28 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
325} 397}
326#endif 398#endif
327 399
400static int socket_mt_v1_check(const struct xt_mtchk_param *par)
401{
402 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
403
404 if (info->flags & ~XT_SOCKET_FLAGS_V1) {
405 pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1);
406 return -EINVAL;
407 }
408 return 0;
409}
410
411static int socket_mt_v2_check(const struct xt_mtchk_param *par)
412{
413 const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo;
414
415 if (info->flags & ~XT_SOCKET_FLAGS_V2) {
416 pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2);
417 return -EINVAL;
418 }
419 return 0;
420}
421
328static struct xt_match socket_mt_reg[] __read_mostly = { 422static struct xt_match socket_mt_reg[] __read_mostly = {
329 { 423 {
330 .name = "socket", 424 .name = "socket",
@@ -339,7 +433,8 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
339 .name = "socket", 433 .name = "socket",
340 .revision = 1, 434 .revision = 1,
341 .family = NFPROTO_IPV4, 435 .family = NFPROTO_IPV4,
342 .match = socket_mt4_v1, 436 .match = socket_mt4_v1_v2,
437 .checkentry = socket_mt_v1_check,
343 .matchsize = sizeof(struct xt_socket_mtinfo1), 438 .matchsize = sizeof(struct xt_socket_mtinfo1),
344 .hooks = (1 << NF_INET_PRE_ROUTING) | 439 .hooks = (1 << NF_INET_PRE_ROUTING) |
345 (1 << NF_INET_LOCAL_IN), 440 (1 << NF_INET_LOCAL_IN),
@@ -350,7 +445,32 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
350 .name = "socket", 445 .name = "socket",
351 .revision = 1, 446 .revision = 1,
352 .family = NFPROTO_IPV6, 447 .family = NFPROTO_IPV6,
353 .match = socket_mt6_v1, 448 .match = socket_mt6_v1_v2,
449 .checkentry = socket_mt_v1_check,
450 .matchsize = sizeof(struct xt_socket_mtinfo1),
451 .hooks = (1 << NF_INET_PRE_ROUTING) |
452 (1 << NF_INET_LOCAL_IN),
453 .me = THIS_MODULE,
454 },
455#endif
456 {
457 .name = "socket",
458 .revision = 2,
459 .family = NFPROTO_IPV4,
460 .match = socket_mt4_v1_v2,
461 .checkentry = socket_mt_v2_check,
462 .matchsize = sizeof(struct xt_socket_mtinfo1),
463 .hooks = (1 << NF_INET_PRE_ROUTING) |
464 (1 << NF_INET_LOCAL_IN),
465 .me = THIS_MODULE,
466 },
467#ifdef XT_SOCKET_HAVE_IPV6
468 {
469 .name = "socket",
470 .revision = 2,
471 .family = NFPROTO_IPV6,
472 .match = socket_mt6_v1_v2,
473 .checkentry = socket_mt_v2_check,
354 .matchsize = sizeof(struct xt_socket_mtinfo1), 474 .matchsize = sizeof(struct xt_socket_mtinfo1),
355 .hooks = (1 << NF_INET_PRE_ROUTING) | 475 .hooks = (1 << NF_INET_PRE_ROUTING) |
356 (1 << NF_INET_LOCAL_IN), 476 (1 << NF_INET_LOCAL_IN),
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index c15042f987bd..a1100640495d 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -691,8 +691,8 @@ static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg)
691{ 691{
692 struct netlbl_domhsh_walk_arg *cb_arg = arg; 692 struct netlbl_domhsh_walk_arg *cb_arg = arg;
693 693
694 if (entry->type == NETLBL_NLTYPE_CIPSOV4 && 694 if (entry->def.type == NETLBL_NLTYPE_CIPSOV4 &&
695 entry->type_def.cipsov4->doi == cb_arg->doi) 695 entry->def.cipso->doi == cb_arg->doi)
696 return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info); 696 return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info);
697 697
698 return 0; 698 return 0;
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 6bb1d42f0fac..85d842e6e431 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -84,15 +84,15 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
84#endif /* IPv6 */ 84#endif /* IPv6 */
85 85
86 ptr = container_of(entry, struct netlbl_dom_map, rcu); 86 ptr = container_of(entry, struct netlbl_dom_map, rcu);
87 if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) { 87 if (ptr->def.type == NETLBL_NLTYPE_ADDRSELECT) {
88 netlbl_af4list_foreach_safe(iter4, tmp4, 88 netlbl_af4list_foreach_safe(iter4, tmp4,
89 &ptr->type_def.addrsel->list4) { 89 &ptr->def.addrsel->list4) {
90 netlbl_af4list_remove_entry(iter4); 90 netlbl_af4list_remove_entry(iter4);
91 kfree(netlbl_domhsh_addr4_entry(iter4)); 91 kfree(netlbl_domhsh_addr4_entry(iter4));
92 } 92 }
93#if IS_ENABLED(CONFIG_IPV6) 93#if IS_ENABLED(CONFIG_IPV6)
94 netlbl_af6list_foreach_safe(iter6, tmp6, 94 netlbl_af6list_foreach_safe(iter6, tmp6,
95 &ptr->type_def.addrsel->list6) { 95 &ptr->def.addrsel->list6) {
96 netlbl_af6list_remove_entry(iter6); 96 netlbl_af6list_remove_entry(iter6);
97 kfree(netlbl_domhsh_addr6_entry(iter6)); 97 kfree(netlbl_domhsh_addr6_entry(iter6));
98 } 98 }
@@ -213,21 +213,21 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry,
213 if (addr4 != NULL) { 213 if (addr4 != NULL) {
214 struct netlbl_domaddr4_map *map4; 214 struct netlbl_domaddr4_map *map4;
215 map4 = netlbl_domhsh_addr4_entry(addr4); 215 map4 = netlbl_domhsh_addr4_entry(addr4);
216 type = map4->type; 216 type = map4->def.type;
217 cipsov4 = map4->type_def.cipsov4; 217 cipsov4 = map4->def.cipso;
218 netlbl_af4list_audit_addr(audit_buf, 0, NULL, 218 netlbl_af4list_audit_addr(audit_buf, 0, NULL,
219 addr4->addr, addr4->mask); 219 addr4->addr, addr4->mask);
220#if IS_ENABLED(CONFIG_IPV6) 220#if IS_ENABLED(CONFIG_IPV6)
221 } else if (addr6 != NULL) { 221 } else if (addr6 != NULL) {
222 struct netlbl_domaddr6_map *map6; 222 struct netlbl_domaddr6_map *map6;
223 map6 = netlbl_domhsh_addr6_entry(addr6); 223 map6 = netlbl_domhsh_addr6_entry(addr6);
224 type = map6->type; 224 type = map6->def.type;
225 netlbl_af6list_audit_addr(audit_buf, 0, NULL, 225 netlbl_af6list_audit_addr(audit_buf, 0, NULL,
226 &addr6->addr, &addr6->mask); 226 &addr6->addr, &addr6->mask);
227#endif /* IPv6 */ 227#endif /* IPv6 */
228 } else { 228 } else {
229 type = entry->type; 229 type = entry->def.type;
230 cipsov4 = entry->type_def.cipsov4; 230 cipsov4 = entry->def.cipso;
231 } 231 }
232 switch (type) { 232 switch (type) {
233 case NETLBL_NLTYPE_UNLABELED: 233 case NETLBL_NLTYPE_UNLABELED:
@@ -265,26 +265,25 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry)
265 if (entry == NULL) 265 if (entry == NULL)
266 return -EINVAL; 266 return -EINVAL;
267 267
268 switch (entry->type) { 268 switch (entry->def.type) {
269 case NETLBL_NLTYPE_UNLABELED: 269 case NETLBL_NLTYPE_UNLABELED:
270 if (entry->type_def.cipsov4 != NULL || 270 if (entry->def.cipso != NULL || entry->def.addrsel != NULL)
271 entry->type_def.addrsel != NULL)
272 return -EINVAL; 271 return -EINVAL;
273 break; 272 break;
274 case NETLBL_NLTYPE_CIPSOV4: 273 case NETLBL_NLTYPE_CIPSOV4:
275 if (entry->type_def.cipsov4 == NULL) 274 if (entry->def.cipso == NULL)
276 return -EINVAL; 275 return -EINVAL;
277 break; 276 break;
278 case NETLBL_NLTYPE_ADDRSELECT: 277 case NETLBL_NLTYPE_ADDRSELECT:
279 netlbl_af4list_foreach(iter4, &entry->type_def.addrsel->list4) { 278 netlbl_af4list_foreach(iter4, &entry->def.addrsel->list4) {
280 map4 = netlbl_domhsh_addr4_entry(iter4); 279 map4 = netlbl_domhsh_addr4_entry(iter4);
281 switch (map4->type) { 280 switch (map4->def.type) {
282 case NETLBL_NLTYPE_UNLABELED: 281 case NETLBL_NLTYPE_UNLABELED:
283 if (map4->type_def.cipsov4 != NULL) 282 if (map4->def.cipso != NULL)
284 return -EINVAL; 283 return -EINVAL;
285 break; 284 break;
286 case NETLBL_NLTYPE_CIPSOV4: 285 case NETLBL_NLTYPE_CIPSOV4:
287 if (map4->type_def.cipsov4 == NULL) 286 if (map4->def.cipso == NULL)
288 return -EINVAL; 287 return -EINVAL;
289 break; 288 break;
290 default: 289 default:
@@ -292,9 +291,9 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry)
292 } 291 }
293 } 292 }
294#if IS_ENABLED(CONFIG_IPV6) 293#if IS_ENABLED(CONFIG_IPV6)
295 netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) { 294 netlbl_af6list_foreach(iter6, &entry->def.addrsel->list6) {
296 map6 = netlbl_domhsh_addr6_entry(iter6); 295 map6 = netlbl_domhsh_addr6_entry(iter6);
297 switch (map6->type) { 296 switch (map6->def.type) {
298 case NETLBL_NLTYPE_UNLABELED: 297 case NETLBL_NLTYPE_UNLABELED:
299 break; 298 break;
300 default: 299 default:
@@ -402,32 +401,31 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
402 rcu_assign_pointer(netlbl_domhsh_def, entry); 401 rcu_assign_pointer(netlbl_domhsh_def, entry);
403 } 402 }
404 403
405 if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { 404 if (entry->def.type == NETLBL_NLTYPE_ADDRSELECT) {
406 netlbl_af4list_foreach_rcu(iter4, 405 netlbl_af4list_foreach_rcu(iter4,
407 &entry->type_def.addrsel->list4) 406 &entry->def.addrsel->list4)
408 netlbl_domhsh_audit_add(entry, iter4, NULL, 407 netlbl_domhsh_audit_add(entry, iter4, NULL,
409 ret_val, audit_info); 408 ret_val, audit_info);
410#if IS_ENABLED(CONFIG_IPV6) 409#if IS_ENABLED(CONFIG_IPV6)
411 netlbl_af6list_foreach_rcu(iter6, 410 netlbl_af6list_foreach_rcu(iter6,
412 &entry->type_def.addrsel->list6) 411 &entry->def.addrsel->list6)
413 netlbl_domhsh_audit_add(entry, NULL, iter6, 412 netlbl_domhsh_audit_add(entry, NULL, iter6,
414 ret_val, audit_info); 413 ret_val, audit_info);
415#endif /* IPv6 */ 414#endif /* IPv6 */
416 } else 415 } else
417 netlbl_domhsh_audit_add(entry, NULL, NULL, 416 netlbl_domhsh_audit_add(entry, NULL, NULL,
418 ret_val, audit_info); 417 ret_val, audit_info);
419 } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT && 418 } else if (entry_old->def.type == NETLBL_NLTYPE_ADDRSELECT &&
420 entry->type == NETLBL_NLTYPE_ADDRSELECT) { 419 entry->def.type == NETLBL_NLTYPE_ADDRSELECT) {
421 struct list_head *old_list4; 420 struct list_head *old_list4;
422 struct list_head *old_list6; 421 struct list_head *old_list6;
423 422
424 old_list4 = &entry_old->type_def.addrsel->list4; 423 old_list4 = &entry_old->def.addrsel->list4;
425 old_list6 = &entry_old->type_def.addrsel->list6; 424 old_list6 = &entry_old->def.addrsel->list6;
426 425
427 /* we only allow the addition of address selectors if all of 426 /* we only allow the addition of address selectors if all of
428 * the selectors do not exist in the existing domain map */ 427 * the selectors do not exist in the existing domain map */
429 netlbl_af4list_foreach_rcu(iter4, 428 netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4)
430 &entry->type_def.addrsel->list4)
431 if (netlbl_af4list_search_exact(iter4->addr, 429 if (netlbl_af4list_search_exact(iter4->addr,
432 iter4->mask, 430 iter4->mask,
433 old_list4)) { 431 old_list4)) {
@@ -435,8 +433,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
435 goto add_return; 433 goto add_return;
436 } 434 }
437#if IS_ENABLED(CONFIG_IPV6) 435#if IS_ENABLED(CONFIG_IPV6)
438 netlbl_af6list_foreach_rcu(iter6, 436 netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6)
439 &entry->type_def.addrsel->list6)
440 if (netlbl_af6list_search_exact(&iter6->addr, 437 if (netlbl_af6list_search_exact(&iter6->addr,
441 &iter6->mask, 438 &iter6->mask,
442 old_list6)) { 439 old_list6)) {
@@ -446,7 +443,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
446#endif /* IPv6 */ 443#endif /* IPv6 */
447 444
448 netlbl_af4list_foreach_safe(iter4, tmp4, 445 netlbl_af4list_foreach_safe(iter4, tmp4,
449 &entry->type_def.addrsel->list4) { 446 &entry->def.addrsel->list4) {
450 netlbl_af4list_remove_entry(iter4); 447 netlbl_af4list_remove_entry(iter4);
451 iter4->valid = 1; 448 iter4->valid = 1;
452 ret_val = netlbl_af4list_add(iter4, old_list4); 449 ret_val = netlbl_af4list_add(iter4, old_list4);
@@ -457,7 +454,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
457 } 454 }
458#if IS_ENABLED(CONFIG_IPV6) 455#if IS_ENABLED(CONFIG_IPV6)
459 netlbl_af6list_foreach_safe(iter6, tmp6, 456 netlbl_af6list_foreach_safe(iter6, tmp6,
460 &entry->type_def.addrsel->list6) { 457 &entry->def.addrsel->list6) {
461 netlbl_af6list_remove_entry(iter6); 458 netlbl_af6list_remove_entry(iter6);
462 iter6->valid = 1; 459 iter6->valid = 1;
463 ret_val = netlbl_af6list_add(iter6, old_list6); 460 ret_val = netlbl_af6list_add(iter6, old_list6);
@@ -538,18 +535,18 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
538 struct netlbl_af4list *iter4; 535 struct netlbl_af4list *iter4;
539 struct netlbl_domaddr4_map *map4; 536 struct netlbl_domaddr4_map *map4;
540 537
541 switch (entry->type) { 538 switch (entry->def.type) {
542 case NETLBL_NLTYPE_ADDRSELECT: 539 case NETLBL_NLTYPE_ADDRSELECT:
543 netlbl_af4list_foreach_rcu(iter4, 540 netlbl_af4list_foreach_rcu(iter4,
544 &entry->type_def.addrsel->list4) { 541 &entry->def.addrsel->list4) {
545 map4 = netlbl_domhsh_addr4_entry(iter4); 542 map4 = netlbl_domhsh_addr4_entry(iter4);
546 cipso_v4_doi_putdef(map4->type_def.cipsov4); 543 cipso_v4_doi_putdef(map4->def.cipso);
547 } 544 }
548 /* no need to check the IPv6 list since we currently 545 /* no need to check the IPv6 list since we currently
549 * support only unlabeled protocols for IPv6 */ 546 * support only unlabeled protocols for IPv6 */
550 break; 547 break;
551 case NETLBL_NLTYPE_CIPSOV4: 548 case NETLBL_NLTYPE_CIPSOV4:
552 cipso_v4_doi_putdef(entry->type_def.cipsov4); 549 cipso_v4_doi_putdef(entry->def.cipso);
553 break; 550 break;
554 } 551 }
555 call_rcu(&entry->rcu, netlbl_domhsh_free_entry); 552 call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
@@ -590,20 +587,21 @@ int netlbl_domhsh_remove_af4(const char *domain,
590 entry_map = netlbl_domhsh_search(domain); 587 entry_map = netlbl_domhsh_search(domain);
591 else 588 else
592 entry_map = netlbl_domhsh_search_def(domain); 589 entry_map = netlbl_domhsh_search_def(domain);
593 if (entry_map == NULL || entry_map->type != NETLBL_NLTYPE_ADDRSELECT) 590 if (entry_map == NULL ||
591 entry_map->def.type != NETLBL_NLTYPE_ADDRSELECT)
594 goto remove_af4_failure; 592 goto remove_af4_failure;
595 593
596 spin_lock(&netlbl_domhsh_lock); 594 spin_lock(&netlbl_domhsh_lock);
597 entry_addr = netlbl_af4list_remove(addr->s_addr, mask->s_addr, 595 entry_addr = netlbl_af4list_remove(addr->s_addr, mask->s_addr,
598 &entry_map->type_def.addrsel->list4); 596 &entry_map->def.addrsel->list4);
599 spin_unlock(&netlbl_domhsh_lock); 597 spin_unlock(&netlbl_domhsh_lock);
600 598
601 if (entry_addr == NULL) 599 if (entry_addr == NULL)
602 goto remove_af4_failure; 600 goto remove_af4_failure;
603 netlbl_af4list_foreach_rcu(iter4, &entry_map->type_def.addrsel->list4) 601 netlbl_af4list_foreach_rcu(iter4, &entry_map->def.addrsel->list4)
604 goto remove_af4_single_addr; 602 goto remove_af4_single_addr;
605#if IS_ENABLED(CONFIG_IPV6) 603#if IS_ENABLED(CONFIG_IPV6)
606 netlbl_af6list_foreach_rcu(iter6, &entry_map->type_def.addrsel->list6) 604 netlbl_af6list_foreach_rcu(iter6, &entry_map->def.addrsel->list6)
607 goto remove_af4_single_addr; 605 goto remove_af4_single_addr;
608#endif /* IPv6 */ 606#endif /* IPv6 */
609 /* the domain mapping is empty so remove it from the mapping table */ 607 /* the domain mapping is empty so remove it from the mapping table */
@@ -616,7 +614,7 @@ remove_af4_single_addr:
616 * shouldn't be a problem */ 614 * shouldn't be a problem */
617 synchronize_rcu(); 615 synchronize_rcu();
618 entry = netlbl_domhsh_addr4_entry(entry_addr); 616 entry = netlbl_domhsh_addr4_entry(entry_addr);
619 cipso_v4_doi_putdef(entry->type_def.cipsov4); 617 cipso_v4_doi_putdef(entry->def.cipso);
620 kfree(entry); 618 kfree(entry);
621 return 0; 619 return 0;
622 620
@@ -693,8 +691,8 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
693 * responsible for ensuring that rcu_read_[un]lock() is called. 691 * responsible for ensuring that rcu_read_[un]lock() is called.
694 * 692 *
695 */ 693 */
696struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, 694struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain,
697 __be32 addr) 695 __be32 addr)
698{ 696{
699 struct netlbl_dom_map *dom_iter; 697 struct netlbl_dom_map *dom_iter;
700 struct netlbl_af4list *addr_iter; 698 struct netlbl_af4list *addr_iter;
@@ -702,15 +700,13 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
702 dom_iter = netlbl_domhsh_search_def(domain); 700 dom_iter = netlbl_domhsh_search_def(domain);
703 if (dom_iter == NULL) 701 if (dom_iter == NULL)
704 return NULL; 702 return NULL;
705 if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
706 return NULL;
707 703
708 addr_iter = netlbl_af4list_search(addr, 704 if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT)
709 &dom_iter->type_def.addrsel->list4); 705 return &dom_iter->def;
706 addr_iter = netlbl_af4list_search(addr, &dom_iter->def.addrsel->list4);
710 if (addr_iter == NULL) 707 if (addr_iter == NULL)
711 return NULL; 708 return NULL;
712 709 return &(netlbl_domhsh_addr4_entry(addr_iter)->def);
713 return netlbl_domhsh_addr4_entry(addr_iter);
714} 710}
715 711
716#if IS_ENABLED(CONFIG_IPV6) 712#if IS_ENABLED(CONFIG_IPV6)
@@ -725,7 +721,7 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
725 * responsible for ensuring that rcu_read_[un]lock() is called. 721 * responsible for ensuring that rcu_read_[un]lock() is called.
726 * 722 *
727 */ 723 */
728struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, 724struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain,
729 const struct in6_addr *addr) 725 const struct in6_addr *addr)
730{ 726{
731 struct netlbl_dom_map *dom_iter; 727 struct netlbl_dom_map *dom_iter;
@@ -734,15 +730,13 @@ struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
734 dom_iter = netlbl_domhsh_search_def(domain); 730 dom_iter = netlbl_domhsh_search_def(domain);
735 if (dom_iter == NULL) 731 if (dom_iter == NULL)
736 return NULL; 732 return NULL;
737 if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
738 return NULL;
739 733
740 addr_iter = netlbl_af6list_search(addr, 734 if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT)
741 &dom_iter->type_def.addrsel->list6); 735 return &dom_iter->def;
736 addr_iter = netlbl_af6list_search(addr, &dom_iter->def.addrsel->list6);
742 if (addr_iter == NULL) 737 if (addr_iter == NULL)
743 return NULL; 738 return NULL;
744 739 return &(netlbl_domhsh_addr6_entry(addr_iter)->def);
745 return netlbl_domhsh_addr6_entry(addr_iter);
746} 740}
747#endif /* IPv6 */ 741#endif /* IPv6 */
748 742
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 90872c4ca30f..b9be0eed8980 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -43,37 +43,35 @@
43#define NETLBL_DOMHSH_BITSIZE 7 43#define NETLBL_DOMHSH_BITSIZE 7
44 44
45/* Domain mapping definition structures */ 45/* Domain mapping definition structures */
46struct netlbl_domaddr_map {
47 struct list_head list4;
48 struct list_head list6;
49};
50struct netlbl_dommap_def {
51 u32 type;
52 union {
53 struct netlbl_domaddr_map *addrsel;
54 struct cipso_v4_doi *cipso;
55 };
56};
46#define netlbl_domhsh_addr4_entry(iter) \ 57#define netlbl_domhsh_addr4_entry(iter) \
47 container_of(iter, struct netlbl_domaddr4_map, list) 58 container_of(iter, struct netlbl_domaddr4_map, list)
48struct netlbl_domaddr4_map { 59struct netlbl_domaddr4_map {
49 u32 type; 60 struct netlbl_dommap_def def;
50 union {
51 struct cipso_v4_doi *cipsov4;
52 } type_def;
53 61
54 struct netlbl_af4list list; 62 struct netlbl_af4list list;
55}; 63};
56#define netlbl_domhsh_addr6_entry(iter) \ 64#define netlbl_domhsh_addr6_entry(iter) \
57 container_of(iter, struct netlbl_domaddr6_map, list) 65 container_of(iter, struct netlbl_domaddr6_map, list)
58struct netlbl_domaddr6_map { 66struct netlbl_domaddr6_map {
59 u32 type; 67 struct netlbl_dommap_def def;
60
61 /* NOTE: no 'type_def' union needed at present since we don't currently
62 * support any IPv6 labeling protocols */
63 68
64 struct netlbl_af6list list; 69 struct netlbl_af6list list;
65}; 70};
66struct netlbl_domaddr_map { 71
67 struct list_head list4;
68 struct list_head list6;
69};
70struct netlbl_dom_map { 72struct netlbl_dom_map {
71 char *domain; 73 char *domain;
72 u32 type; 74 struct netlbl_dommap_def def;
73 union {
74 struct cipso_v4_doi *cipsov4;
75 struct netlbl_domaddr_map *addrsel;
76 } type_def;
77 75
78 u32 valid; 76 u32 valid;
79 struct list_head list; 77 struct list_head list;
@@ -97,16 +95,16 @@ int netlbl_domhsh_remove_af4(const char *domain,
97int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); 95int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
98int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); 96int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
99struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); 97struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
100struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, 98struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain,
101 __be32 addr); 99 __be32 addr);
100#if IS_ENABLED(CONFIG_IPV6)
101struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain,
102 const struct in6_addr *addr);
103#endif /* IPv6 */
104
102int netlbl_domhsh_walk(u32 *skip_bkt, 105int netlbl_domhsh_walk(u32 *skip_bkt,
103 u32 *skip_chain, 106 u32 *skip_chain,
104 int (*callback) (struct netlbl_dom_map *entry, void *arg), 107 int (*callback) (struct netlbl_dom_map *entry, void *arg),
105 void *cb_arg); 108 void *cb_arg);
106 109
107#if IS_ENABLED(CONFIG_IPV6)
108struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
109 const struct in6_addr *addr);
110#endif /* IPv6 */
111
112#endif 110#endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 7c94aedd0912..96a458e12f60 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -122,7 +122,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
122 } 122 }
123 123
124 if (addr == NULL && mask == NULL) 124 if (addr == NULL && mask == NULL)
125 entry->type = NETLBL_NLTYPE_UNLABELED; 125 entry->def.type = NETLBL_NLTYPE_UNLABELED;
126 else if (addr != NULL && mask != NULL) { 126 else if (addr != NULL && mask != NULL) {
127 addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); 127 addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC);
128 if (addrmap == NULL) 128 if (addrmap == NULL)
@@ -137,7 +137,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
137 map4 = kzalloc(sizeof(*map4), GFP_ATOMIC); 137 map4 = kzalloc(sizeof(*map4), GFP_ATOMIC);
138 if (map4 == NULL) 138 if (map4 == NULL)
139 goto cfg_unlbl_map_add_failure; 139 goto cfg_unlbl_map_add_failure;
140 map4->type = NETLBL_NLTYPE_UNLABELED; 140 map4->def.type = NETLBL_NLTYPE_UNLABELED;
141 map4->list.addr = addr4->s_addr & mask4->s_addr; 141 map4->list.addr = addr4->s_addr & mask4->s_addr;
142 map4->list.mask = mask4->s_addr; 142 map4->list.mask = mask4->s_addr;
143 map4->list.valid = 1; 143 map4->list.valid = 1;
@@ -154,7 +154,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
154 map6 = kzalloc(sizeof(*map6), GFP_ATOMIC); 154 map6 = kzalloc(sizeof(*map6), GFP_ATOMIC);
155 if (map6 == NULL) 155 if (map6 == NULL)
156 goto cfg_unlbl_map_add_failure; 156 goto cfg_unlbl_map_add_failure;
157 map6->type = NETLBL_NLTYPE_UNLABELED; 157 map6->def.type = NETLBL_NLTYPE_UNLABELED;
158 map6->list.addr = *addr6; 158 map6->list.addr = *addr6;
159 map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0]; 159 map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0];
160 map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1]; 160 map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1];
@@ -174,8 +174,8 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
174 break; 174 break;
175 } 175 }
176 176
177 entry->type_def.addrsel = addrmap; 177 entry->def.addrsel = addrmap;
178 entry->type = NETLBL_NLTYPE_ADDRSELECT; 178 entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
179 } else { 179 } else {
180 ret_val = -EINVAL; 180 ret_val = -EINVAL;
181 goto cfg_unlbl_map_add_failure; 181 goto cfg_unlbl_map_add_failure;
@@ -355,8 +355,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
355 } 355 }
356 356
357 if (addr == NULL && mask == NULL) { 357 if (addr == NULL && mask == NULL) {
358 entry->type_def.cipsov4 = doi_def; 358 entry->def.cipso = doi_def;
359 entry->type = NETLBL_NLTYPE_CIPSOV4; 359 entry->def.type = NETLBL_NLTYPE_CIPSOV4;
360 } else if (addr != NULL && mask != NULL) { 360 } else if (addr != NULL && mask != NULL) {
361 addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); 361 addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC);
362 if (addrmap == NULL) 362 if (addrmap == NULL)
@@ -367,8 +367,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
367 addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC); 367 addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC);
368 if (addrinfo == NULL) 368 if (addrinfo == NULL)
369 goto out_addrinfo; 369 goto out_addrinfo;
370 addrinfo->type_def.cipsov4 = doi_def; 370 addrinfo->def.cipso = doi_def;
371 addrinfo->type = NETLBL_NLTYPE_CIPSOV4; 371 addrinfo->def.type = NETLBL_NLTYPE_CIPSOV4;
372 addrinfo->list.addr = addr->s_addr & mask->s_addr; 372 addrinfo->list.addr = addr->s_addr & mask->s_addr;
373 addrinfo->list.mask = mask->s_addr; 373 addrinfo->list.mask = mask->s_addr;
374 addrinfo->list.valid = 1; 374 addrinfo->list.valid = 1;
@@ -376,8 +376,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
376 if (ret_val != 0) 376 if (ret_val != 0)
377 goto cfg_cipsov4_map_add_failure; 377 goto cfg_cipsov4_map_add_failure;
378 378
379 entry->type_def.addrsel = addrmap; 379 entry->def.addrsel = addrmap;
380 entry->type = NETLBL_NLTYPE_ADDRSELECT; 380 entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
381 } else { 381 } else {
382 ret_val = -EINVAL; 382 ret_val = -EINVAL;
383 goto out_addrmap; 383 goto out_addrmap;
@@ -657,14 +657,14 @@ int netlbl_sock_setattr(struct sock *sk,
657 } 657 }
658 switch (family) { 658 switch (family) {
659 case AF_INET: 659 case AF_INET:
660 switch (dom_entry->type) { 660 switch (dom_entry->def.type) {
661 case NETLBL_NLTYPE_ADDRSELECT: 661 case NETLBL_NLTYPE_ADDRSELECT:
662 ret_val = -EDESTADDRREQ; 662 ret_val = -EDESTADDRREQ;
663 break; 663 break;
664 case NETLBL_NLTYPE_CIPSOV4: 664 case NETLBL_NLTYPE_CIPSOV4:
665 ret_val = cipso_v4_sock_setattr(sk, 665 ret_val = cipso_v4_sock_setattr(sk,
666 dom_entry->type_def.cipsov4, 666 dom_entry->def.cipso,
667 secattr); 667 secattr);
668 break; 668 break;
669 case NETLBL_NLTYPE_UNLABELED: 669 case NETLBL_NLTYPE_UNLABELED:
670 ret_val = 0; 670 ret_val = 0;
@@ -754,23 +754,22 @@ int netlbl_conn_setattr(struct sock *sk,
754{ 754{
755 int ret_val; 755 int ret_val;
756 struct sockaddr_in *addr4; 756 struct sockaddr_in *addr4;
757 struct netlbl_domaddr4_map *af4_entry; 757 struct netlbl_dommap_def *entry;
758 758
759 rcu_read_lock(); 759 rcu_read_lock();
760 switch (addr->sa_family) { 760 switch (addr->sa_family) {
761 case AF_INET: 761 case AF_INET:
762 addr4 = (struct sockaddr_in *)addr; 762 addr4 = (struct sockaddr_in *)addr;
763 af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, 763 entry = netlbl_domhsh_getentry_af4(secattr->domain,
764 addr4->sin_addr.s_addr); 764 addr4->sin_addr.s_addr);
765 if (af4_entry == NULL) { 765 if (entry == NULL) {
766 ret_val = -ENOENT; 766 ret_val = -ENOENT;
767 goto conn_setattr_return; 767 goto conn_setattr_return;
768 } 768 }
769 switch (af4_entry->type) { 769 switch (entry->type) {
770 case NETLBL_NLTYPE_CIPSOV4: 770 case NETLBL_NLTYPE_CIPSOV4:
771 ret_val = cipso_v4_sock_setattr(sk, 771 ret_val = cipso_v4_sock_setattr(sk,
772 af4_entry->type_def.cipsov4, 772 entry->cipso, secattr);
773 secattr);
774 break; 773 break;
775 case NETLBL_NLTYPE_UNLABELED: 774 case NETLBL_NLTYPE_UNLABELED:
776 /* just delete the protocols we support for right now 775 /* just delete the protocols we support for right now
@@ -812,36 +811,21 @@ int netlbl_req_setattr(struct request_sock *req,
812 const struct netlbl_lsm_secattr *secattr) 811 const struct netlbl_lsm_secattr *secattr)
813{ 812{
814 int ret_val; 813 int ret_val;
815 struct netlbl_dom_map *dom_entry; 814 struct netlbl_dommap_def *entry;
816 struct netlbl_domaddr4_map *af4_entry;
817 u32 proto_type;
818 struct cipso_v4_doi *proto_cv4;
819 815
820 rcu_read_lock(); 816 rcu_read_lock();
821 dom_entry = netlbl_domhsh_getentry(secattr->domain);
822 if (dom_entry == NULL) {
823 ret_val = -ENOENT;
824 goto req_setattr_return;
825 }
826 switch (req->rsk_ops->family) { 817 switch (req->rsk_ops->family) {
827 case AF_INET: 818 case AF_INET:
828 if (dom_entry->type == NETLBL_NLTYPE_ADDRSELECT) { 819 entry = netlbl_domhsh_getentry_af4(secattr->domain,
829 struct inet_request_sock *req_inet = inet_rsk(req); 820 inet_rsk(req)->rmt_addr);
830 af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, 821 if (entry == NULL) {
831 req_inet->rmt_addr); 822 ret_val = -ENOENT;
832 if (af4_entry == NULL) { 823 goto req_setattr_return;
833 ret_val = -ENOENT;
834 goto req_setattr_return;
835 }
836 proto_type = af4_entry->type;
837 proto_cv4 = af4_entry->type_def.cipsov4;
838 } else {
839 proto_type = dom_entry->type;
840 proto_cv4 = dom_entry->type_def.cipsov4;
841 } 824 }
842 switch (proto_type) { 825 switch (entry->type) {
843 case NETLBL_NLTYPE_CIPSOV4: 826 case NETLBL_NLTYPE_CIPSOV4:
844 ret_val = cipso_v4_req_setattr(req, proto_cv4, secattr); 827 ret_val = cipso_v4_req_setattr(req,
828 entry->cipso, secattr);
845 break; 829 break;
846 case NETLBL_NLTYPE_UNLABELED: 830 case NETLBL_NLTYPE_UNLABELED:
847 /* just delete the protocols we support for right now 831 /* just delete the protocols we support for right now
@@ -899,23 +883,21 @@ int netlbl_skbuff_setattr(struct sk_buff *skb,
899{ 883{
900 int ret_val; 884 int ret_val;
901 struct iphdr *hdr4; 885 struct iphdr *hdr4;
902 struct netlbl_domaddr4_map *af4_entry; 886 struct netlbl_dommap_def *entry;
903 887
904 rcu_read_lock(); 888 rcu_read_lock();
905 switch (family) { 889 switch (family) {
906 case AF_INET: 890 case AF_INET:
907 hdr4 = ip_hdr(skb); 891 hdr4 = ip_hdr(skb);
908 af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, 892 entry = netlbl_domhsh_getentry_af4(secattr->domain,hdr4->daddr);
909 hdr4->daddr); 893 if (entry == NULL) {
910 if (af4_entry == NULL) {
911 ret_val = -ENOENT; 894 ret_val = -ENOENT;
912 goto skbuff_setattr_return; 895 goto skbuff_setattr_return;
913 } 896 }
914 switch (af4_entry->type) { 897 switch (entry->type) {
915 case NETLBL_NLTYPE_CIPSOV4: 898 case NETLBL_NLTYPE_CIPSOV4:
916 ret_val = cipso_v4_skbuff_setattr(skb, 899 ret_val = cipso_v4_skbuff_setattr(skb, entry->cipso,
917 af4_entry->type_def.cipsov4, 900 secattr);
918 secattr);
919 break; 901 break;
920 case NETLBL_NLTYPE_UNLABELED: 902 case NETLBL_NLTYPE_UNLABELED:
921 /* just delete the protocols we support for right now 903 /* just delete the protocols we support for right now
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index c5384ffc6146..dd1c37d7acbc 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -104,7 +104,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
104 ret_val = -ENOMEM; 104 ret_val = -ENOMEM;
105 goto add_failure; 105 goto add_failure;
106 } 106 }
107 entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); 107 entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
108 if (info->attrs[NLBL_MGMT_A_DOMAIN]) { 108 if (info->attrs[NLBL_MGMT_A_DOMAIN]) {
109 size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); 109 size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
110 entry->domain = kmalloc(tmp_size, GFP_KERNEL); 110 entry->domain = kmalloc(tmp_size, GFP_KERNEL);
@@ -116,12 +116,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
116 info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); 116 info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
117 } 117 }
118 118
119 /* NOTE: internally we allow/use a entry->type value of 119 /* NOTE: internally we allow/use a entry->def.type value of
120 * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users 120 * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users
121 * to pass that as a protocol value because we need to know the 121 * to pass that as a protocol value because we need to know the
122 * "real" protocol */ 122 * "real" protocol */
123 123
124 switch (entry->type) { 124 switch (entry->def.type) {
125 case NETLBL_NLTYPE_UNLABELED: 125 case NETLBL_NLTYPE_UNLABELED:
126 break; 126 break;
127 case NETLBL_NLTYPE_CIPSOV4: 127 case NETLBL_NLTYPE_CIPSOV4:
@@ -132,7 +132,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
132 cipsov4 = cipso_v4_doi_getdef(tmp_val); 132 cipsov4 = cipso_v4_doi_getdef(tmp_val);
133 if (cipsov4 == NULL) 133 if (cipsov4 == NULL)
134 goto add_failure; 134 goto add_failure;
135 entry->type_def.cipsov4 = cipsov4; 135 entry->def.cipso = cipsov4;
136 break; 136 break;
137 default: 137 default:
138 goto add_failure; 138 goto add_failure;
@@ -172,9 +172,9 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
172 map->list.addr = addr->s_addr & mask->s_addr; 172 map->list.addr = addr->s_addr & mask->s_addr;
173 map->list.mask = mask->s_addr; 173 map->list.mask = mask->s_addr;
174 map->list.valid = 1; 174 map->list.valid = 1;
175 map->type = entry->type; 175 map->def.type = entry->def.type;
176 if (cipsov4) 176 if (cipsov4)
177 map->type_def.cipsov4 = cipsov4; 177 map->def.cipso = cipsov4;
178 178
179 ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); 179 ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
180 if (ret_val != 0) { 180 if (ret_val != 0) {
@@ -182,8 +182,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
182 goto add_failure; 182 goto add_failure;
183 } 183 }
184 184
185 entry->type = NETLBL_NLTYPE_ADDRSELECT; 185 entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
186 entry->type_def.addrsel = addrmap; 186 entry->def.addrsel = addrmap;
187#if IS_ENABLED(CONFIG_IPV6) 187#if IS_ENABLED(CONFIG_IPV6)
188 } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { 188 } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) {
189 struct in6_addr *addr; 189 struct in6_addr *addr;
@@ -223,7 +223,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
223 map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; 223 map->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
224 map->list.mask = *mask; 224 map->list.mask = *mask;
225 map->list.valid = 1; 225 map->list.valid = 1;
226 map->type = entry->type; 226 map->def.type = entry->def.type;
227 227
228 ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); 228 ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
229 if (ret_val != 0) { 229 if (ret_val != 0) {
@@ -231,8 +231,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
231 goto add_failure; 231 goto add_failure;
232 } 232 }
233 233
234 entry->type = NETLBL_NLTYPE_ADDRSELECT; 234 entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
235 entry->type_def.addrsel = addrmap; 235 entry->def.addrsel = addrmap;
236#endif /* IPv6 */ 236#endif /* IPv6 */
237 } 237 }
238 238
@@ -281,14 +281,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
281 return ret_val; 281 return ret_val;
282 } 282 }
283 283
284 switch (entry->type) { 284 switch (entry->def.type) {
285 case NETLBL_NLTYPE_ADDRSELECT: 285 case NETLBL_NLTYPE_ADDRSELECT:
286 nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); 286 nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST);
287 if (nla_a == NULL) 287 if (nla_a == NULL)
288 return -ENOMEM; 288 return -ENOMEM;
289 289
290 netlbl_af4list_foreach_rcu(iter4, 290 netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) {
291 &entry->type_def.addrsel->list4) {
292 struct netlbl_domaddr4_map *map4; 291 struct netlbl_domaddr4_map *map4;
293 struct in_addr addr_struct; 292 struct in_addr addr_struct;
294 293
@@ -310,13 +309,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
310 return ret_val; 309 return ret_val;
311 map4 = netlbl_domhsh_addr4_entry(iter4); 310 map4 = netlbl_domhsh_addr4_entry(iter4);
312 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, 311 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
313 map4->type); 312 map4->def.type);
314 if (ret_val != 0) 313 if (ret_val != 0)
315 return ret_val; 314 return ret_val;
316 switch (map4->type) { 315 switch (map4->def.type) {
317 case NETLBL_NLTYPE_CIPSOV4: 316 case NETLBL_NLTYPE_CIPSOV4:
318 ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, 317 ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
319 map4->type_def.cipsov4->doi); 318 map4->def.cipso->doi);
320 if (ret_val != 0) 319 if (ret_val != 0)
321 return ret_val; 320 return ret_val;
322 break; 321 break;
@@ -325,8 +324,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
325 nla_nest_end(skb, nla_b); 324 nla_nest_end(skb, nla_b);
326 } 325 }
327#if IS_ENABLED(CONFIG_IPV6) 326#if IS_ENABLED(CONFIG_IPV6)
328 netlbl_af6list_foreach_rcu(iter6, 327 netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) {
329 &entry->type_def.addrsel->list6) {
330 struct netlbl_domaddr6_map *map6; 328 struct netlbl_domaddr6_map *map6;
331 329
332 nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); 330 nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
@@ -345,7 +343,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
345 return ret_val; 343 return ret_val;
346 map6 = netlbl_domhsh_addr6_entry(iter6); 344 map6 = netlbl_domhsh_addr6_entry(iter6);
347 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, 345 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
348 map6->type); 346 map6->def.type);
349 if (ret_val != 0) 347 if (ret_val != 0)
350 return ret_val; 348 return ret_val;
351 349
@@ -356,14 +354,14 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
356 nla_nest_end(skb, nla_a); 354 nla_nest_end(skb, nla_a);
357 break; 355 break;
358 case NETLBL_NLTYPE_UNLABELED: 356 case NETLBL_NLTYPE_UNLABELED:
359 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); 357 ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type);
360 break; 358 break;
361 case NETLBL_NLTYPE_CIPSOV4: 359 case NETLBL_NLTYPE_CIPSOV4:
362 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); 360 ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type);
363 if (ret_val != 0) 361 if (ret_val != 0)
364 return ret_val; 362 return ret_val;
365 ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, 363 ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
366 entry->type_def.cipsov4->doi); 364 entry->def.cipso->doi);
367 break; 365 break;
368 } 366 }
369 367
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 8a6c6ea466d8..8f0897407a2c 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -708,7 +708,7 @@ unlhsh_remove_return:
708 * netlbl_unlhsh_netdev_handler - Network device notification handler 708 * netlbl_unlhsh_netdev_handler - Network device notification handler
709 * @this: notifier block 709 * @this: notifier block
710 * @event: the event 710 * @event: the event
711 * @ptr: the network device (cast to void) 711 * @ptr: the netdevice notifier info (cast to void)
712 * 712 *
713 * Description: 713 * Description:
714 * Handle network device events, although at present all we care about is a 714 * Handle network device events, although at present all we care about is a
@@ -717,10 +717,9 @@ unlhsh_remove_return:
717 * 717 *
718 */ 718 */
719static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, 719static int netlbl_unlhsh_netdev_handler(struct notifier_block *this,
720 unsigned long event, 720 unsigned long event, void *ptr)
721 void *ptr)
722{ 721{
723 struct net_device *dev = ptr; 722 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
724 struct netlbl_unlhsh_iface *iface = NULL; 723 struct netlbl_unlhsh_iface *iface = NULL;
725 724
726 if (!net_eq(dev_net(dev), &init_net)) 725 if (!net_eq(dev_net(dev), &init_net))
@@ -1542,7 +1541,7 @@ int __init netlbl_unlabel_defconf(void)
1542 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1541 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1543 if (entry == NULL) 1542 if (entry == NULL)
1544 return -ENOMEM; 1543 return -ENOMEM;
1545 entry->type = NETLBL_NLTYPE_UNLABELED; 1544 entry->def.type = NETLBL_NLTYPE_UNLABELED;
1546 ret_val = netlbl_domhsh_add_default(entry, &audit_info); 1545 ret_val = netlbl_domhsh_add_default(entry, &audit_info);
1547 if (ret_val != 0) 1546 if (ret_val != 0)
1548 return ret_val; 1547 return ret_val;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d0b3dd60d386..8df7f64c6db3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -57,6 +57,7 @@
57#include <linux/audit.h> 57#include <linux/audit.h>
58#include <linux/mutex.h> 58#include <linux/mutex.h>
59#include <linux/vmalloc.h> 59#include <linux/vmalloc.h>
60#include <linux/if_arp.h>
60#include <asm/cacheflush.h> 61#include <asm/cacheflush.h>
61 62
62#include <net/net_namespace.h> 63#include <net/net_namespace.h>
@@ -101,6 +102,9 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
101 102
102static ATOMIC_NOTIFIER_HEAD(netlink_chain); 103static ATOMIC_NOTIFIER_HEAD(netlink_chain);
103 104
105static DEFINE_SPINLOCK(netlink_tap_lock);
106static struct list_head netlink_tap_all __read_mostly;
107
104static inline u32 netlink_group_mask(u32 group) 108static inline u32 netlink_group_mask(u32 group)
105{ 109{
106 return group ? 1 << (group - 1) : 0; 110 return group ? 1 << (group - 1) : 0;
@@ -111,6 +115,130 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u
111 return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; 115 return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
112} 116}
113 117
118int netlink_add_tap(struct netlink_tap *nt)
119{
120 if (unlikely(nt->dev->type != ARPHRD_NETLINK))
121 return -EINVAL;
122
123 spin_lock(&netlink_tap_lock);
124 list_add_rcu(&nt->list, &netlink_tap_all);
125 spin_unlock(&netlink_tap_lock);
126
127 if (nt->module)
128 __module_get(nt->module);
129
130 return 0;
131}
132EXPORT_SYMBOL_GPL(netlink_add_tap);
133
134int __netlink_remove_tap(struct netlink_tap *nt)
135{
136 bool found = false;
137 struct netlink_tap *tmp;
138
139 spin_lock(&netlink_tap_lock);
140
141 list_for_each_entry(tmp, &netlink_tap_all, list) {
142 if (nt == tmp) {
143 list_del_rcu(&nt->list);
144 found = true;
145 goto out;
146 }
147 }
148
149 pr_warn("__netlink_remove_tap: %p not found\n", nt);
150out:
151 spin_unlock(&netlink_tap_lock);
152
153 if (found && nt->module)
154 module_put(nt->module);
155
156 return found ? 0 : -ENODEV;
157}
158EXPORT_SYMBOL_GPL(__netlink_remove_tap);
159
160int netlink_remove_tap(struct netlink_tap *nt)
161{
162 int ret;
163
164 ret = __netlink_remove_tap(nt);
165 synchronize_net();
166
167 return ret;
168}
169EXPORT_SYMBOL_GPL(netlink_remove_tap);
170
171static bool netlink_filter_tap(const struct sk_buff *skb)
172{
173 struct sock *sk = skb->sk;
174 bool pass = false;
175
176 /* We take the more conservative approach and
177 * whitelist socket protocols that may pass.
178 */
179 switch (sk->sk_protocol) {
180 case NETLINK_ROUTE:
181 case NETLINK_USERSOCK:
182 case NETLINK_SOCK_DIAG:
183 case NETLINK_NFLOG:
184 case NETLINK_XFRM:
185 case NETLINK_FIB_LOOKUP:
186 case NETLINK_NETFILTER:
187 case NETLINK_GENERIC:
188 pass = true;
189 break;
190 }
191
192 return pass;
193}
194
195static int __netlink_deliver_tap_skb(struct sk_buff *skb,
196 struct net_device *dev)
197{
198 struct sk_buff *nskb;
199 struct sock *sk = skb->sk;
200 int ret = -ENOMEM;
201
202 dev_hold(dev);
203 nskb = skb_clone(skb, GFP_ATOMIC);
204 if (nskb) {
205 nskb->dev = dev;
206 nskb->protocol = htons((u16) sk->sk_protocol);
207
208 ret = dev_queue_xmit(nskb);
209 if (unlikely(ret > 0))
210 ret = net_xmit_errno(ret);
211 }
212
213 dev_put(dev);
214 return ret;
215}
216
217static void __netlink_deliver_tap(struct sk_buff *skb)
218{
219 int ret;
220 struct netlink_tap *tmp;
221
222 if (!netlink_filter_tap(skb))
223 return;
224
225 list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
226 ret = __netlink_deliver_tap_skb(skb, tmp->dev);
227 if (unlikely(ret))
228 break;
229 }
230}
231
232static void netlink_deliver_tap(struct sk_buff *skb)
233{
234 rcu_read_lock();
235
236 if (unlikely(!list_empty(&netlink_tap_all)))
237 __netlink_deliver_tap(skb);
238
239 rcu_read_unlock();
240}
241
114static void netlink_overrun(struct sock *sk) 242static void netlink_overrun(struct sock *sk)
115{ 243{
116 struct netlink_sock *nlk = nlk_sk(sk); 244 struct netlink_sock *nlk = nlk_sk(sk);
@@ -196,14 +324,14 @@ static void **alloc_pg_vec(struct netlink_sock *nlk,
196{ 324{
197 unsigned int block_nr = req->nm_block_nr; 325 unsigned int block_nr = req->nm_block_nr;
198 unsigned int i; 326 unsigned int i;
199 void **pg_vec, *ptr; 327 void **pg_vec;
200 328
201 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); 329 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
202 if (pg_vec == NULL) 330 if (pg_vec == NULL)
203 return NULL; 331 return NULL;
204 332
205 for (i = 0; i < block_nr; i++) { 333 for (i = 0; i < block_nr; i++) {
206 pg_vec[i] = ptr = alloc_one_pg_vec_page(order); 334 pg_vec[i] = alloc_one_pg_vec_page(order);
207 if (pg_vec[i] == NULL) 335 if (pg_vec[i] == NULL)
208 goto err1; 336 goto err1;
209 } 337 }
@@ -371,7 +499,7 @@ static int netlink_mmap(struct file *file, struct socket *sock,
371 err = 0; 499 err = 0;
372out: 500out:
373 mutex_unlock(&nlk->pg_vec_lock); 501 mutex_unlock(&nlk->pg_vec_lock);
374 return 0; 502 return err;
375} 503}
376 504
377static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) 505static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
@@ -497,7 +625,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
497 * for dumps is performed here. A dump is allowed to continue 625 * for dumps is performed here. A dump is allowed to continue
498 * if at least half the ring is unused. 626 * if at least half the ring is unused.
499 */ 627 */
500 while (nlk->cb != NULL && netlink_dump_space(nlk)) { 628 while (nlk->cb_running && netlink_dump_space(nlk)) {
501 err = netlink_dump(sk); 629 err = netlink_dump(sk);
502 if (err < 0) { 630 if (err < 0) {
503 sk->sk_err = err; 631 sk->sk_err = err;
@@ -704,18 +832,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
704#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 832#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0
705#endif /* CONFIG_NETLINK_MMAP */ 833#endif /* CONFIG_NETLINK_MMAP */
706 834
707static void netlink_destroy_callback(struct netlink_callback *cb)
708{
709 kfree_skb(cb->skb);
710 kfree(cb);
711}
712
713static void netlink_consume_callback(struct netlink_callback *cb)
714{
715 consume_skb(cb->skb);
716 kfree(cb);
717}
718
719static void netlink_skb_destructor(struct sk_buff *skb) 835static void netlink_skb_destructor(struct sk_buff *skb)
720{ 836{
721#ifdef CONFIG_NETLINK_MMAP 837#ifdef CONFIG_NETLINK_MMAP
@@ -750,6 +866,13 @@ static void netlink_skb_destructor(struct sk_buff *skb)
750 skb->head = NULL; 866 skb->head = NULL;
751 } 867 }
752#endif 868#endif
869 if (is_vmalloc_addr(skb->head)) {
870 if (!skb->cloned ||
871 !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
872 vfree(skb->head);
873
874 skb->head = NULL;
875 }
753 if (skb->sk != NULL) 876 if (skb->sk != NULL)
754 sock_rfree(skb); 877 sock_rfree(skb);
755} 878}
@@ -767,12 +890,12 @@ static void netlink_sock_destruct(struct sock *sk)
767{ 890{
768 struct netlink_sock *nlk = nlk_sk(sk); 891 struct netlink_sock *nlk = nlk_sk(sk);
769 892
770 if (nlk->cb) { 893 if (nlk->cb_running) {
771 if (nlk->cb->done) 894 if (nlk->cb.done)
772 nlk->cb->done(nlk->cb); 895 nlk->cb.done(&nlk->cb);
773 896
774 module_put(nlk->cb->module); 897 module_put(nlk->cb.module);
775 netlink_destroy_callback(nlk->cb); 898 kfree_skb(nlk->cb.skb);
776 } 899 }
777 900
778 skb_queue_purge(&sk->sk_receive_queue); 901 skb_queue_purge(&sk->sk_receive_queue);
@@ -854,16 +977,23 @@ netlink_unlock_table(void)
854 wake_up(&nl_table_wait); 977 wake_up(&nl_table_wait);
855} 978}
856 979
980static bool netlink_compare(struct net *net, struct sock *sk)
981{
982 return net_eq(sock_net(sk), net);
983}
984
857static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) 985static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
858{ 986{
859 struct nl_portid_hash *hash = &nl_table[protocol].hash; 987 struct netlink_table *table = &nl_table[protocol];
988 struct nl_portid_hash *hash = &table->hash;
860 struct hlist_head *head; 989 struct hlist_head *head;
861 struct sock *sk; 990 struct sock *sk;
862 991
863 read_lock(&nl_table_lock); 992 read_lock(&nl_table_lock);
864 head = nl_portid_hashfn(hash, portid); 993 head = nl_portid_hashfn(hash, portid);
865 sk_for_each(sk, head) { 994 sk_for_each(sk, head) {
866 if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { 995 if (table->compare(net, sk) &&
996 (nlk_sk(sk)->portid == portid)) {
867 sock_hold(sk); 997 sock_hold(sk);
868 goto found; 998 goto found;
869 } 999 }
@@ -976,7 +1106,8 @@ netlink_update_listeners(struct sock *sk)
976 1106
977static int netlink_insert(struct sock *sk, struct net *net, u32 portid) 1107static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
978{ 1108{
979 struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; 1109 struct netlink_table *table = &nl_table[sk->sk_protocol];
1110 struct nl_portid_hash *hash = &table->hash;
980 struct hlist_head *head; 1111 struct hlist_head *head;
981 int err = -EADDRINUSE; 1112 int err = -EADDRINUSE;
982 struct sock *osk; 1113 struct sock *osk;
@@ -986,7 +1117,8 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
986 head = nl_portid_hashfn(hash, portid); 1117 head = nl_portid_hashfn(hash, portid);
987 len = 0; 1118 len = 0;
988 sk_for_each(osk, head) { 1119 sk_for_each(osk, head) {
989 if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) 1120 if (table->compare(net, osk) &&
1121 (nlk_sk(osk)->portid == portid))
990 break; 1122 break;
991 len++; 1123 len++;
992 } 1124 }
@@ -1183,7 +1315,8 @@ static int netlink_autobind(struct socket *sock)
1183{ 1315{
1184 struct sock *sk = sock->sk; 1316 struct sock *sk = sock->sk;
1185 struct net *net = sock_net(sk); 1317 struct net *net = sock_net(sk);
1186 struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; 1318 struct netlink_table *table = &nl_table[sk->sk_protocol];
1319 struct nl_portid_hash *hash = &table->hash;
1187 struct hlist_head *head; 1320 struct hlist_head *head;
1188 struct sock *osk; 1321 struct sock *osk;
1189 s32 portid = task_tgid_vnr(current); 1322 s32 portid = task_tgid_vnr(current);
@@ -1195,7 +1328,7 @@ retry:
1195 netlink_table_grab(); 1328 netlink_table_grab();
1196 head = nl_portid_hashfn(hash, portid); 1329 head = nl_portid_hashfn(hash, portid);
1197 sk_for_each(osk, head) { 1330 sk_for_each(osk, head) {
1198 if (!net_eq(sock_net(osk), net)) 1331 if (!table->compare(net, osk))
1199 continue; 1332 continue;
1200 if (nlk_sk(osk)->portid == portid) { 1333 if (nlk_sk(osk)->portid == portid) {
1201 /* Bind collision, search negative portid values. */ 1334 /* Bind collision, search negative portid values. */
@@ -1420,6 +1553,33 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
1420 return sock; 1553 return sock;
1421} 1554}
1422 1555
1556static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
1557 int broadcast)
1558{
1559 struct sk_buff *skb;
1560 void *data;
1561
1562 if (size <= NLMSG_GOODSIZE || broadcast)
1563 return alloc_skb(size, GFP_KERNEL);
1564
1565 size = SKB_DATA_ALIGN(size) +
1566 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1567
1568 data = vmalloc(size);
1569 if (data == NULL)
1570 return NULL;
1571
1572 skb = build_skb(data, size);
1573 if (skb == NULL)
1574 vfree(data);
1575 else {
1576 skb->head_frag = 0;
1577 skb->destructor = netlink_skb_destructor;
1578 }
1579
1580 return skb;
1581}
1582
1423/* 1583/*
1424 * Attach a skb to a netlink socket. 1584 * Attach a skb to a netlink socket.
1425 * The caller must hold a reference to the destination socket. On error, the 1585 * The caller must hold a reference to the destination socket. On error, the
@@ -1475,6 +1635,8 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1475{ 1635{
1476 int len = skb->len; 1636 int len = skb->len;
1477 1637
1638 netlink_deliver_tap(skb);
1639
1478#ifdef CONFIG_NETLINK_MMAP 1640#ifdef CONFIG_NETLINK_MMAP
1479 if (netlink_skb_is_mmaped(skb)) 1641 if (netlink_skb_is_mmaped(skb))
1480 netlink_queue_mmaped_skb(sk, skb); 1642 netlink_queue_mmaped_skb(sk, skb);
@@ -1510,7 +1672,7 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1510 return skb; 1672 return skb;
1511 1673
1512 delta = skb->end - skb->tail; 1674 delta = skb->end - skb->tail;
1513 if (delta * 2 < skb->truesize) 1675 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
1514 return skb; 1676 return skb;
1515 1677
1516 if (skb_shared(skb)) { 1678 if (skb_shared(skb)) {
@@ -1535,6 +1697,11 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
1535 1697
1536 ret = -ECONNREFUSED; 1698 ret = -ECONNREFUSED;
1537 if (nlk->netlink_rcv != NULL) { 1699 if (nlk->netlink_rcv != NULL) {
1700 /* We could do a netlink_deliver_tap(skb) here as well
1701 * but since this is intended for the kernel only, we
1702 * should rather let it stay under the hood.
1703 */
1704
1538 ret = skb->len; 1705 ret = skb->len;
1539 netlink_skb_set_owner_r(skb, sk); 1706 netlink_skb_set_owner_r(skb, sk);
1540 NETLINK_CB(skb).sk = ssk; 1707 NETLINK_CB(skb).sk = ssk;
@@ -2096,7 +2263,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
2096 if (len > sk->sk_sndbuf - 32) 2263 if (len > sk->sk_sndbuf - 32)
2097 goto out; 2264 goto out;
2098 err = -ENOBUFS; 2265 err = -ENOBUFS;
2099 skb = alloc_skb(len, GFP_KERNEL); 2266 skb = netlink_alloc_large_skb(len, dst_group);
2100 if (skb == NULL) 2267 if (skb == NULL)
2101 goto out; 2268 goto out;
2102 2269
@@ -2201,7 +2368,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
2201 2368
2202 skb_free_datagram(sk, skb); 2369 skb_free_datagram(sk, skb);
2203 2370
2204 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { 2371 if (nlk->cb_running &&
2372 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
2205 ret = netlink_dump(sk); 2373 ret = netlink_dump(sk);
2206 if (ret) { 2374 if (ret) {
2207 sk->sk_err = ret; 2375 sk->sk_err = ret;
@@ -2285,6 +2453,8 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
2285 if (cfg) { 2453 if (cfg) {
2286 nl_table[unit].bind = cfg->bind; 2454 nl_table[unit].bind = cfg->bind;
2287 nl_table[unit].flags = cfg->flags; 2455 nl_table[unit].flags = cfg->flags;
2456 if (cfg->compare)
2457 nl_table[unit].compare = cfg->compare;
2288 } 2458 }
2289 nl_table[unit].registered = 1; 2459 nl_table[unit].registered = 1;
2290 } else { 2460 } else {
@@ -2415,13 +2585,12 @@ static int netlink_dump(struct sock *sk)
2415 int alloc_size; 2585 int alloc_size;
2416 2586
2417 mutex_lock(nlk->cb_mutex); 2587 mutex_lock(nlk->cb_mutex);
2418 2588 if (!nlk->cb_running) {
2419 cb = nlk->cb;
2420 if (cb == NULL) {
2421 err = -EINVAL; 2589 err = -EINVAL;
2422 goto errout_skb; 2590 goto errout_skb;
2423 } 2591 }
2424 2592
2593 cb = &nlk->cb;
2425 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2594 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2426 2595
2427 if (!netlink_rx_is_mmaped(sk) && 2596 if (!netlink_rx_is_mmaped(sk) &&
@@ -2459,11 +2628,11 @@ static int netlink_dump(struct sock *sk)
2459 2628
2460 if (cb->done) 2629 if (cb->done)
2461 cb->done(cb); 2630 cb->done(cb);
2462 nlk->cb = NULL;
2463 mutex_unlock(nlk->cb_mutex);
2464 2631
2632 nlk->cb_running = false;
2633 mutex_unlock(nlk->cb_mutex);
2465 module_put(cb->module); 2634 module_put(cb->module);
2466 netlink_consume_callback(cb); 2635 consume_skb(cb->skb);
2467 return 0; 2636 return 0;
2468 2637
2469errout_skb: 2638errout_skb:
@@ -2481,59 +2650,51 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2481 struct netlink_sock *nlk; 2650 struct netlink_sock *nlk;
2482 int ret; 2651 int ret;
2483 2652
2484 cb = kzalloc(sizeof(*cb), GFP_KERNEL);
2485 if (cb == NULL)
2486 return -ENOBUFS;
2487
2488 /* Memory mapped dump requests need to be copied to avoid looping 2653 /* Memory mapped dump requests need to be copied to avoid looping
2489 * on the pending state in netlink_mmap_sendmsg() while the CB hold 2654 * on the pending state in netlink_mmap_sendmsg() while the CB hold
2490 * a reference to the skb. 2655 * a reference to the skb.
2491 */ 2656 */
2492 if (netlink_skb_is_mmaped(skb)) { 2657 if (netlink_skb_is_mmaped(skb)) {
2493 skb = skb_copy(skb, GFP_KERNEL); 2658 skb = skb_copy(skb, GFP_KERNEL);
2494 if (skb == NULL) { 2659 if (skb == NULL)
2495 kfree(cb);
2496 return -ENOBUFS; 2660 return -ENOBUFS;
2497 }
2498 } else 2661 } else
2499 atomic_inc(&skb->users); 2662 atomic_inc(&skb->users);
2500 2663
2501 cb->dump = control->dump;
2502 cb->done = control->done;
2503 cb->nlh = nlh;
2504 cb->data = control->data;
2505 cb->module = control->module;
2506 cb->min_dump_alloc = control->min_dump_alloc;
2507 cb->skb = skb;
2508
2509 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2664 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2510 if (sk == NULL) { 2665 if (sk == NULL) {
2511 netlink_destroy_callback(cb); 2666 ret = -ECONNREFUSED;
2512 return -ECONNREFUSED; 2667 goto error_free;
2513 } 2668 }
2514 nlk = nlk_sk(sk);
2515 2669
2670 nlk = nlk_sk(sk);
2516 mutex_lock(nlk->cb_mutex); 2671 mutex_lock(nlk->cb_mutex);
2517 /* A dump is in progress... */ 2672 /* A dump is in progress... */
2518 if (nlk->cb) { 2673 if (nlk->cb_running) {
2519 mutex_unlock(nlk->cb_mutex);
2520 netlink_destroy_callback(cb);
2521 ret = -EBUSY; 2674 ret = -EBUSY;
2522 goto out; 2675 goto error_unlock;
2523 } 2676 }
2524 /* add reference of module which cb->dump belongs to */ 2677 /* add reference of module which cb->dump belongs to */
2525 if (!try_module_get(cb->module)) { 2678 if (!try_module_get(control->module)) {
2526 mutex_unlock(nlk->cb_mutex);
2527 netlink_destroy_callback(cb);
2528 ret = -EPROTONOSUPPORT; 2679 ret = -EPROTONOSUPPORT;
2529 goto out; 2680 goto error_unlock;
2530 } 2681 }
2531 2682
2532 nlk->cb = cb; 2683 cb = &nlk->cb;
2684 memset(cb, 0, sizeof(*cb));
2685 cb->dump = control->dump;
2686 cb->done = control->done;
2687 cb->nlh = nlh;
2688 cb->data = control->data;
2689 cb->module = control->module;
2690 cb->min_dump_alloc = control->min_dump_alloc;
2691 cb->skb = skb;
2692
2693 nlk->cb_running = true;
2694
2533 mutex_unlock(nlk->cb_mutex); 2695 mutex_unlock(nlk->cb_mutex);
2534 2696
2535 ret = netlink_dump(sk); 2697 ret = netlink_dump(sk);
2536out:
2537 sock_put(sk); 2698 sock_put(sk);
2538 2699
2539 if (ret) 2700 if (ret)
@@ -2543,6 +2704,13 @@ out:
2543 * signal not to send ACK even if it was requested. 2704 * signal not to send ACK even if it was requested.
2544 */ 2705 */
2545 return -EINTR; 2706 return -EINTR;
2707
2708error_unlock:
2709 sock_put(sk);
2710 mutex_unlock(nlk->cb_mutex);
2711error_free:
2712 kfree_skb(skb);
2713 return ret;
2546} 2714}
2547EXPORT_SYMBOL(__netlink_dump_start); 2715EXPORT_SYMBOL(__netlink_dump_start);
2548 2716
@@ -2707,6 +2875,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2707{ 2875{
2708 struct sock *s; 2876 struct sock *s;
2709 struct nl_seq_iter *iter; 2877 struct nl_seq_iter *iter;
2878 struct net *net;
2710 int i, j; 2879 int i, j;
2711 2880
2712 ++*pos; 2881 ++*pos;
@@ -2714,11 +2883,12 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2714 if (v == SEQ_START_TOKEN) 2883 if (v == SEQ_START_TOKEN)
2715 return netlink_seq_socket_idx(seq, 0); 2884 return netlink_seq_socket_idx(seq, 0);
2716 2885
2886 net = seq_file_net(seq);
2717 iter = seq->private; 2887 iter = seq->private;
2718 s = v; 2888 s = v;
2719 do { 2889 do {
2720 s = sk_next(s); 2890 s = sk_next(s);
2721 } while (s && sock_net(s) != seq_file_net(seq)); 2891 } while (s && !nl_table[s->sk_protocol].compare(net, s));
2722 if (s) 2892 if (s)
2723 return s; 2893 return s;
2724 2894
@@ -2730,7 +2900,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2730 2900
2731 for (; j <= hash->mask; j++) { 2901 for (; j <= hash->mask; j++) {
2732 s = sk_head(&hash->table[j]); 2902 s = sk_head(&hash->table[j]);
2733 while (s && sock_net(s) != seq_file_net(seq)) 2903
2904 while (s && !nl_table[s->sk_protocol].compare(net, s))
2734 s = sk_next(s); 2905 s = sk_next(s);
2735 if (s) { 2906 if (s) {
2736 iter->link = i; 2907 iter->link = i;
@@ -2762,14 +2933,14 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
2762 struct sock *s = v; 2933 struct sock *s = v;
2763 struct netlink_sock *nlk = nlk_sk(s); 2934 struct netlink_sock *nlk = nlk_sk(s);
2764 2935
2765 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", 2936 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n",
2766 s, 2937 s,
2767 s->sk_protocol, 2938 s->sk_protocol,
2768 nlk->portid, 2939 nlk->portid,
2769 nlk->groups ? (u32)nlk->groups[0] : 0, 2940 nlk->groups ? (u32)nlk->groups[0] : 0,
2770 sk_rmem_alloc_get(s), 2941 sk_rmem_alloc_get(s),
2771 sk_wmem_alloc_get(s), 2942 sk_wmem_alloc_get(s),
2772 nlk->cb, 2943 nlk->cb_running,
2773 atomic_read(&s->sk_refcnt), 2944 atomic_read(&s->sk_refcnt),
2774 atomic_read(&s->sk_drops), 2945 atomic_read(&s->sk_drops),
2775 sock_i_ino(s) 2946 sock_i_ino(s)
@@ -2923,8 +3094,12 @@ static int __init netlink_proto_init(void)
2923 hash->shift = 0; 3094 hash->shift = 0;
2924 hash->mask = 0; 3095 hash->mask = 0;
2925 hash->rehash_time = jiffies; 3096 hash->rehash_time = jiffies;
3097
3098 nl_table[i].compare = netlink_compare;
2926 } 3099 }
2927 3100
3101 INIT_LIST_HEAD(&netlink_tap_all);
3102
2928 netlink_add_usersock_entry(); 3103 netlink_add_usersock_entry();
2929 3104
2930 sock_register(&netlink_family_ops); 3105 sock_register(&netlink_family_ops);
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index ed8522265f4e..acbd774eeb7c 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -32,7 +32,8 @@ struct netlink_sock {
32 unsigned long *groups; 32 unsigned long *groups;
33 unsigned long state; 33 unsigned long state;
34 wait_queue_head_t wait; 34 wait_queue_head_t wait;
35 struct netlink_callback *cb; 35 bool cb_running;
36 struct netlink_callback cb;
36 struct mutex *cb_mutex; 37 struct mutex *cb_mutex;
37 struct mutex cb_def_mutex; 38 struct mutex cb_def_mutex;
38 void (*netlink_rcv)(struct sk_buff *skb); 39 void (*netlink_rcv)(struct sk_buff *skb);
@@ -73,6 +74,7 @@ struct netlink_table {
73 struct mutex *cb_mutex; 74 struct mutex *cb_mutex;
74 struct module *module; 75 struct module *module;
75 void (*bind)(int group); 76 void (*bind)(int group);
77 bool (*compare)(struct net *net, struct sock *sock);
76 int registered; 78 int registered;
77}; 79};
78 80
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2fd6dbea327a..0c741cec4d0d 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -364,7 +364,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
364EXPORT_SYMBOL(genl_unregister_ops); 364EXPORT_SYMBOL(genl_unregister_ops);
365 365
366/** 366/**
367 * genl_register_family - register a generic netlink family 367 * __genl_register_family - register a generic netlink family
368 * @family: generic netlink family 368 * @family: generic netlink family
369 * 369 *
370 * Registers the specified family after validating it first. Only one 370 * Registers the specified family after validating it first. Only one
@@ -374,7 +374,7 @@ EXPORT_SYMBOL(genl_unregister_ops);
374 * 374 *
375 * Return 0 on success or a negative error code. 375 * Return 0 on success or a negative error code.
376 */ 376 */
377int genl_register_family(struct genl_family *family) 377int __genl_register_family(struct genl_family *family)
378{ 378{
379 int err = -EINVAL; 379 int err = -EINVAL;
380 380
@@ -430,10 +430,10 @@ errout_locked:
430errout: 430errout:
431 return err; 431 return err;
432} 432}
433EXPORT_SYMBOL(genl_register_family); 433EXPORT_SYMBOL(__genl_register_family);
434 434
435/** 435/**
436 * genl_register_family_with_ops - register a generic netlink family 436 * __genl_register_family_with_ops - register a generic netlink family
437 * @family: generic netlink family 437 * @family: generic netlink family
438 * @ops: operations to be registered 438 * @ops: operations to be registered
439 * @n_ops: number of elements to register 439 * @n_ops: number of elements to register
@@ -457,12 +457,12 @@ EXPORT_SYMBOL(genl_register_family);
457 * 457 *
458 * Return 0 on success or a negative error code. 458 * Return 0 on success or a negative error code.
459 */ 459 */
460int genl_register_family_with_ops(struct genl_family *family, 460int __genl_register_family_with_ops(struct genl_family *family,
461 struct genl_ops *ops, size_t n_ops) 461 struct genl_ops *ops, size_t n_ops)
462{ 462{
463 int err, i; 463 int err, i;
464 464
465 err = genl_register_family(family); 465 err = __genl_register_family(family);
466 if (err) 466 if (err)
467 return err; 467 return err;
468 468
@@ -476,7 +476,7 @@ err_out:
476 genl_unregister_family(family); 476 genl_unregister_family(family);
477 return err; 477 return err;
478} 478}
479EXPORT_SYMBOL(genl_register_family_with_ops); 479EXPORT_SYMBOL(__genl_register_family_with_ops);
480 480
481/** 481/**
482 * genl_unregister_family - unregister generic netlink family 482 * genl_unregister_family - unregister generic netlink family
@@ -544,6 +544,30 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
544} 544}
545EXPORT_SYMBOL(genlmsg_put); 545EXPORT_SYMBOL(genlmsg_put);
546 546
547static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
548{
549 struct genl_ops *ops = cb->data;
550 int rc;
551
552 genl_lock();
553 rc = ops->dumpit(skb, cb);
554 genl_unlock();
555 return rc;
556}
557
558static int genl_lock_done(struct netlink_callback *cb)
559{
560 struct genl_ops *ops = cb->data;
561 int rc = 0;
562
563 if (ops->done) {
564 genl_lock();
565 rc = ops->done(cb);
566 genl_unlock();
567 }
568 return rc;
569}
570
547static int genl_family_rcv_msg(struct genl_family *family, 571static int genl_family_rcv_msg(struct genl_family *family,
548 struct sk_buff *skb, 572 struct sk_buff *skb,
549 struct nlmsghdr *nlh) 573 struct nlmsghdr *nlh)
@@ -571,16 +595,35 @@ static int genl_family_rcv_msg(struct genl_family *family,
571 !capable(CAP_NET_ADMIN)) 595 !capable(CAP_NET_ADMIN))
572 return -EPERM; 596 return -EPERM;
573 597
574 if (nlh->nlmsg_flags & NLM_F_DUMP) { 598 if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
575 struct netlink_dump_control c = { 599 int rc;
576 .dump = ops->dumpit,
577 .done = ops->done,
578 };
579 600
580 if (ops->dumpit == NULL) 601 if (ops->dumpit == NULL)
581 return -EOPNOTSUPP; 602 return -EOPNOTSUPP;
582 603
583 return netlink_dump_start(net->genl_sock, skb, nlh, &c); 604 if (!family->parallel_ops) {
605 struct netlink_dump_control c = {
606 .module = family->module,
607 .data = ops,
608 .dump = genl_lock_dumpit,
609 .done = genl_lock_done,
610 };
611
612 genl_unlock();
613 rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
614 genl_lock();
615
616 } else {
617 struct netlink_dump_control c = {
618 .module = family->module,
619 .dump = ops->dumpit,
620 .done = ops->done,
621 };
622
623 rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
624 }
625
626 return rc;
584 } 627 }
585 628
586 if (ops->doit == NULL) 629 if (ops->doit == NULL)
@@ -877,8 +920,10 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
877#ifdef CONFIG_MODULES 920#ifdef CONFIG_MODULES
878 if (res == NULL) { 921 if (res == NULL) {
879 genl_unlock(); 922 genl_unlock();
923 up_read(&cb_lock);
880 request_module("net-pf-%d-proto-%d-family-%s", 924 request_module("net-pf-%d-proto-%d-family-%s",
881 PF_NETLINK, NETLINK_GENERIC, name); 925 PF_NETLINK, NETLINK_GENERIC, name);
926 down_read(&cb_lock);
882 genl_lock(); 927 genl_lock();
883 res = genl_family_find_byname(name); 928 res = genl_family_find_byname(name);
884 } 929 }
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ec0c80fde69f..698814bfa7ad 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -117,7 +117,7 @@ static void nr_kill_by_device(struct net_device *dev)
117 */ 117 */
118static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 118static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
119{ 119{
120 struct net_device *dev = (struct net_device *)ptr; 120 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
121 121
122 if (!net_eq(dev_net(dev), &init_net)) 122 if (!net_eq(dev_net(dev), &init_net))
123 return NOTIFY_DONE; 123 return NOTIFY_DONE;
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index 42f630b9a698..ba1c368b3f18 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -34,7 +34,7 @@ static int min_reset[] = {0}, max_reset[] = {1};
34 34
35static struct ctl_table_header *nr_table_header; 35static struct ctl_table_header *nr_table_header;
36 36
37static ctl_table nr_table[] = { 37static struct ctl_table nr_table[] = {
38 { 38 {
39 .procname = "default_path_quality", 39 .procname = "default_path_quality",
40 .data = &sysctl_netrom_default_path_quality, 40 .data = &sysctl_netrom_default_path_quality,
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 40d2527693da..e92923cf3e03 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -44,6 +44,55 @@ DEFINE_MUTEX(nfc_devlist_mutex);
44/* NFC device ID bitmap */ 44/* NFC device ID bitmap */
45static DEFINE_IDA(nfc_index_ida); 45static DEFINE_IDA(nfc_index_ida);
46 46
47int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name)
48{
49 int rc = 0;
50
51 pr_debug("%s do firmware %s\n", dev_name(&dev->dev), firmware_name);
52
53 device_lock(&dev->dev);
54
55 if (!device_is_registered(&dev->dev)) {
56 rc = -ENODEV;
57 goto error;
58 }
59
60 if (dev->dev_up) {
61 rc = -EBUSY;
62 goto error;
63 }
64
65 if (!dev->ops->fw_download) {
66 rc = -EOPNOTSUPP;
67 goto error;
68 }
69
70 dev->fw_download_in_progress = true;
71 rc = dev->ops->fw_download(dev, firmware_name);
72 if (rc)
73 dev->fw_download_in_progress = false;
74
75error:
76 device_unlock(&dev->dev);
77 return rc;
78}
79
80/**
81 * nfc_fw_download_done - inform that a firmware download was completed
82 *
83 * @dev: The nfc device to which firmware was downloaded
84 * @firmware_name: The firmware filename
85 * @result: The positive value of a standard errno value
86 */
87int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
88 u32 result)
89{
90 dev->fw_download_in_progress = false;
91
92 return nfc_genl_fw_download_done(dev, firmware_name, result);
93}
94EXPORT_SYMBOL(nfc_fw_download_done);
95
47/** 96/**
48 * nfc_dev_up - turn on the NFC device 97 * nfc_dev_up - turn on the NFC device
49 * 98 *
@@ -69,6 +118,11 @@ int nfc_dev_up(struct nfc_dev *dev)
69 goto error; 118 goto error;
70 } 119 }
71 120
121 if (dev->fw_download_in_progress) {
122 rc = -EBUSY;
123 goto error;
124 }
125
72 if (dev->dev_up) { 126 if (dev->dev_up) {
73 rc = -EALREADY; 127 rc = -EALREADY;
74 goto error; 128 goto error;
@@ -80,6 +134,13 @@ int nfc_dev_up(struct nfc_dev *dev)
80 if (!rc) 134 if (!rc)
81 dev->dev_up = true; 135 dev->dev_up = true;
82 136
137 /* We have to enable the device before discovering SEs */
138 if (dev->ops->discover_se) {
139 rc = dev->ops->discover_se(dev);
140 if (rc)
141 pr_warn("SE discovery failed\n");
142 }
143
83error: 144error:
84 device_unlock(&dev->dev); 145 device_unlock(&dev->dev);
85 return rc; 146 return rc;
@@ -475,6 +536,112 @@ error:
475 return rc; 536 return rc;
476} 537}
477 538
539static struct nfc_se *find_se(struct nfc_dev *dev, u32 se_idx)
540{
541 struct nfc_se *se, *n;
542
543 list_for_each_entry_safe(se, n, &dev->secure_elements, list)
544 if (se->idx == se_idx)
545 return se;
546
547 return NULL;
548}
549
550int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
551{
552
553 struct nfc_se *se;
554 int rc;
555
556 pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx);
557
558 device_lock(&dev->dev);
559
560 if (!device_is_registered(&dev->dev)) {
561 rc = -ENODEV;
562 goto error;
563 }
564
565 if (!dev->dev_up) {
566 rc = -ENODEV;
567 goto error;
568 }
569
570 if (dev->polling) {
571 rc = -EBUSY;
572 goto error;
573 }
574
575 if (!dev->ops->enable_se || !dev->ops->disable_se) {
576 rc = -EOPNOTSUPP;
577 goto error;
578 }
579
580 se = find_se(dev, se_idx);
581 if (!se) {
582 rc = -EINVAL;
583 goto error;
584 }
585
586 if (se->state == NFC_SE_ENABLED) {
587 rc = -EALREADY;
588 goto error;
589 }
590
591 rc = dev->ops->enable_se(dev, se_idx);
592 if (rc >= 0)
593 se->state = NFC_SE_ENABLED;
594
595error:
596 device_unlock(&dev->dev);
597 return rc;
598}
599
600int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
601{
602
603 struct nfc_se *se;
604 int rc;
605
606 pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx);
607
608 device_lock(&dev->dev);
609
610 if (!device_is_registered(&dev->dev)) {
611 rc = -ENODEV;
612 goto error;
613 }
614
615 if (!dev->dev_up) {
616 rc = -ENODEV;
617 goto error;
618 }
619
620 if (!dev->ops->enable_se || !dev->ops->disable_se) {
621 rc = -EOPNOTSUPP;
622 goto error;
623 }
624
625 se = find_se(dev, se_idx);
626 if (!se) {
627 rc = -EINVAL;
628 goto error;
629 }
630
631 if (se->state == NFC_SE_DISABLED) {
632 rc = -EALREADY;
633 goto error;
634 }
635
636 rc = dev->ops->disable_se(dev, se_idx);
637 if (rc >= 0)
638 se->state = NFC_SE_DISABLED;
639
640error:
641 device_unlock(&dev->dev);
642 return rc;
643}
644
478int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len) 645int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len)
479{ 646{
480 pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len); 647 pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len);
@@ -707,14 +874,79 @@ inline void nfc_driver_failure(struct nfc_dev *dev, int err)
707} 874}
708EXPORT_SYMBOL(nfc_driver_failure); 875EXPORT_SYMBOL(nfc_driver_failure);
709 876
877int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type)
878{
879 struct nfc_se *se;
880 int rc;
881
882 pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx);
883
884 se = find_se(dev, se_idx);
885 if (se)
886 return -EALREADY;
887
888 se = kzalloc(sizeof(struct nfc_se), GFP_KERNEL);
889 if (!se)
890 return -ENOMEM;
891
892 se->idx = se_idx;
893 se->type = type;
894 se->state = NFC_SE_DISABLED;
895 INIT_LIST_HEAD(&se->list);
896
897 list_add(&se->list, &dev->secure_elements);
898
899 rc = nfc_genl_se_added(dev, se_idx, type);
900 if (rc < 0) {
901 list_del(&se->list);
902 kfree(se);
903
904 return rc;
905 }
906
907 return 0;
908}
909EXPORT_SYMBOL(nfc_add_se);
910
911int nfc_remove_se(struct nfc_dev *dev, u32 se_idx)
912{
913 struct nfc_se *se, *n;
914 int rc;
915
916 pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx);
917
918 list_for_each_entry_safe(se, n, &dev->secure_elements, list)
919 if (se->idx == se_idx) {
920 rc = nfc_genl_se_removed(dev, se_idx);
921 if (rc < 0)
922 return rc;
923
924 list_del(&se->list);
925 kfree(se);
926
927 return 0;
928 }
929
930 return -EINVAL;
931}
932EXPORT_SYMBOL(nfc_remove_se);
933
710static void nfc_release(struct device *d) 934static void nfc_release(struct device *d)
711{ 935{
712 struct nfc_dev *dev = to_nfc_dev(d); 936 struct nfc_dev *dev = to_nfc_dev(d);
937 struct nfc_se *se, *n;
713 938
714 pr_debug("dev_name=%s\n", dev_name(&dev->dev)); 939 pr_debug("dev_name=%s\n", dev_name(&dev->dev));
715 940
716 nfc_genl_data_exit(&dev->genl_data); 941 nfc_genl_data_exit(&dev->genl_data);
717 kfree(dev->targets); 942 kfree(dev->targets);
943
944 list_for_each_entry_safe(se, n, &dev->secure_elements, list) {
945 nfc_genl_se_removed(dev, se->idx);
946 list_del(&se->list);
947 kfree(se);
948 }
949
718 kfree(dev); 950 kfree(dev);
719} 951}
720 952
@@ -786,7 +1018,6 @@ struct nfc_dev *nfc_get_device(unsigned int idx)
786 */ 1018 */
787struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, 1019struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
788 u32 supported_protocols, 1020 u32 supported_protocols,
789 u32 supported_se,
790 int tx_headroom, int tx_tailroom) 1021 int tx_headroom, int tx_tailroom)
791{ 1022{
792 struct nfc_dev *dev; 1023 struct nfc_dev *dev;
@@ -804,10 +1035,9 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
804 1035
805 dev->ops = ops; 1036 dev->ops = ops;
806 dev->supported_protocols = supported_protocols; 1037 dev->supported_protocols = supported_protocols;
807 dev->supported_se = supported_se;
808 dev->active_se = NFC_SE_NONE;
809 dev->tx_headroom = tx_headroom; 1038 dev->tx_headroom = tx_headroom;
810 dev->tx_tailroom = tx_tailroom; 1039 dev->tx_tailroom = tx_tailroom;
1040 INIT_LIST_HEAD(&dev->secure_elements);
811 1041
812 nfc_genl_data_init(&dev->genl_data); 1042 nfc_genl_data_init(&dev->genl_data);
813 1043
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 91020b210d87..d07ca4c5cf8c 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -570,21 +570,21 @@ static int hci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target,
570{ 570{
571 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); 571 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
572 572
573 if (hdev->ops->dep_link_up) 573 if (!hdev->ops->dep_link_up)
574 return hdev->ops->dep_link_up(hdev, target, comm_mode, 574 return 0;
575 gb, gb_len);
576 575
577 return 0; 576 return hdev->ops->dep_link_up(hdev, target, comm_mode,
577 gb, gb_len);
578} 578}
579 579
580static int hci_dep_link_down(struct nfc_dev *nfc_dev) 580static int hci_dep_link_down(struct nfc_dev *nfc_dev)
581{ 581{
582 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); 582 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
583 583
584 if (hdev->ops->dep_link_down) 584 if (!hdev->ops->dep_link_down)
585 return hdev->ops->dep_link_down(hdev); 585 return 0;
586 586
587 return 0; 587 return hdev->ops->dep_link_down(hdev);
588} 588}
589 589
590static int hci_activate_target(struct nfc_dev *nfc_dev, 590static int hci_activate_target(struct nfc_dev *nfc_dev,
@@ -673,12 +673,12 @@ static int hci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb)
673{ 673{
674 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); 674 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
675 675
676 if (hdev->ops->tm_send) 676 if (!hdev->ops->tm_send) {
677 return hdev->ops->tm_send(hdev, skb); 677 kfree_skb(skb);
678 678 return -ENOTSUPP;
679 kfree_skb(skb); 679 }
680 680
681 return -ENOTSUPP; 681 return hdev->ops->tm_send(hdev, skb);
682} 682}
683 683
684static int hci_check_presence(struct nfc_dev *nfc_dev, 684static int hci_check_presence(struct nfc_dev *nfc_dev,
@@ -686,8 +686,38 @@ static int hci_check_presence(struct nfc_dev *nfc_dev,
686{ 686{
687 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); 687 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
688 688
689 if (hdev->ops->check_presence) 689 if (!hdev->ops->check_presence)
690 return hdev->ops->check_presence(hdev, target); 690 return 0;
691
692 return hdev->ops->check_presence(hdev, target);
693}
694
695static int hci_discover_se(struct nfc_dev *nfc_dev)
696{
697 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
698
699 if (hdev->ops->discover_se)
700 return hdev->ops->discover_se(hdev);
701
702 return 0;
703}
704
705static int hci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx)
706{
707 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
708
709 if (hdev->ops->enable_se)
710 return hdev->ops->enable_se(hdev, se_idx);
711
712 return 0;
713}
714
715static int hci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx)
716{
717 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
718
719 if (hdev->ops->disable_se)
720 return hdev->ops->disable_se(hdev, se_idx);
691 721
692 return 0; 722 return 0;
693} 723}
@@ -779,6 +809,16 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb)
779 } 809 }
780} 810}
781 811
812static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
813{
814 struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
815
816 if (!hdev->ops->fw_download)
817 return -ENOTSUPP;
818
819 return hdev->ops->fw_download(hdev, firmware_name);
820}
821
782static struct nfc_ops hci_nfc_ops = { 822static struct nfc_ops hci_nfc_ops = {
783 .dev_up = hci_dev_up, 823 .dev_up = hci_dev_up,
784 .dev_down = hci_dev_down, 824 .dev_down = hci_dev_down,
@@ -791,13 +831,16 @@ static struct nfc_ops hci_nfc_ops = {
791 .im_transceive = hci_transceive, 831 .im_transceive = hci_transceive,
792 .tm_send = hci_tm_send, 832 .tm_send = hci_tm_send,
793 .check_presence = hci_check_presence, 833 .check_presence = hci_check_presence,
834 .fw_download = hci_fw_download,
835 .discover_se = hci_discover_se,
836 .enable_se = hci_enable_se,
837 .disable_se = hci_disable_se,
794}; 838};
795 839
796struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, 840struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
797 struct nfc_hci_init_data *init_data, 841 struct nfc_hci_init_data *init_data,
798 unsigned long quirks, 842 unsigned long quirks,
799 u32 protocols, 843 u32 protocols,
800 u32 supported_se,
801 const char *llc_name, 844 const char *llc_name,
802 int tx_headroom, 845 int tx_headroom,
803 int tx_tailroom, 846 int tx_tailroom,
@@ -823,7 +866,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
823 return NULL; 866 return NULL;
824 } 867 }
825 868
826 hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se, 869 hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols,
827 tx_headroom + HCI_CMDS_HEADROOM, 870 tx_headroom + HCI_CMDS_HEADROOM,
828 tx_tailroom); 871 tx_tailroom);
829 if (!hdev->ndev) { 872 if (!hdev->ndev) {
diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index ff8c434f7df8..f4d48b57ea11 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h
@@ -19,6 +19,8 @@
19 19
20enum llcp_state { 20enum llcp_state {
21 LLCP_CONNECTED = 1, /* wait_for_packet() wants that */ 21 LLCP_CONNECTED = 1, /* wait_for_packet() wants that */
22 LLCP_CONNECTING,
23 LLCP_DISCONNECTING,
22 LLCP_CLOSED, 24 LLCP_CLOSED,
23 LLCP_BOUND, 25 LLCP_BOUND,
24 LLCP_LISTEN, 26 LLCP_LISTEN,
@@ -246,7 +248,6 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
246void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); 248void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
247void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head); 249void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);
248void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); 250void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
249int nfc_llcp_disconnect(struct nfc_llcp_sock *sock);
250int nfc_llcp_send_symm(struct nfc_dev *dev); 251int nfc_llcp_send_symm(struct nfc_dev *dev);
251int nfc_llcp_send_connect(struct nfc_llcp_sock *sock); 252int nfc_llcp_send_connect(struct nfc_llcp_sock *sock);
252int nfc_llcp_send_cc(struct nfc_llcp_sock *sock); 253int nfc_llcp_send_cc(struct nfc_llcp_sock *sock);
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index c1b23eef83ca..1017894807c0 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -339,7 +339,7 @@ static struct sk_buff *llcp_allocate_pdu(struct nfc_llcp_sock *sock,
339 return skb; 339 return skb;
340} 340}
341 341
342int nfc_llcp_disconnect(struct nfc_llcp_sock *sock) 342int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock)
343{ 343{
344 struct sk_buff *skb; 344 struct sk_buff *skb;
345 struct nfc_dev *dev; 345 struct nfc_dev *dev;
@@ -630,26 +630,6 @@ int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason)
630 return 0; 630 return 0;
631} 631}
632 632
633int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock)
634{
635 struct sk_buff *skb;
636 struct nfc_llcp_local *local;
637
638 pr_debug("Send DISC\n");
639
640 local = sock->local;
641 if (local == NULL)
642 return -ENODEV;
643
644 skb = llcp_allocate_pdu(sock, LLCP_PDU_DISC, 0);
645 if (skb == NULL)
646 return -ENOMEM;
647
648 skb_queue_head(&local->tx_queue, skb);
649
650 return 0;
651}
652
653int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, 633int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
654 struct msghdr *msg, size_t len) 634 struct msghdr *msg, size_t len)
655{ 635{
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 158bdbf668cc..81cd3416c7d4 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -537,6 +537,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
537 u8 *lto_tlv, lto_length; 537 u8 *lto_tlv, lto_length;
538 u8 *wks_tlv, wks_length; 538 u8 *wks_tlv, wks_length;
539 u8 *miux_tlv, miux_length; 539 u8 *miux_tlv, miux_length;
540 __be16 wks = cpu_to_be16(local->local_wks);
540 u8 gb_len = 0; 541 u8 gb_len = 0;
541 int ret = 0; 542 int ret = 0;
542 543
@@ -549,8 +550,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
549 gb_len += lto_length; 550 gb_len += lto_length;
550 551
551 pr_debug("Local wks 0x%lx\n", local->local_wks); 552 pr_debug("Local wks 0x%lx\n", local->local_wks);
552 wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&local->local_wks, 2, 553 wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length);
553 &wks_length);
554 gb_len += wks_length; 554 gb_len += wks_length;
555 555
556 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, 556 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0,
@@ -719,6 +719,10 @@ static void nfc_llcp_tx_work(struct work_struct *work)
719 llcp_sock = nfc_llcp_sock(sk); 719 llcp_sock = nfc_llcp_sock(sk);
720 720
721 if (llcp_sock == NULL && nfc_llcp_ptype(skb) == LLCP_PDU_I) { 721 if (llcp_sock == NULL && nfc_llcp_ptype(skb) == LLCP_PDU_I) {
722 kfree_skb(skb);
723 nfc_llcp_send_symm(local->dev);
724 } else if (llcp_sock && !llcp_sock->remote_ready) {
725 skb_queue_head(&local->tx_queue, skb);
722 nfc_llcp_send_symm(local->dev); 726 nfc_llcp_send_symm(local->dev);
723 } else { 727 } else {
724 struct sk_buff *copy_skb = NULL; 728 struct sk_buff *copy_skb = NULL;
@@ -730,6 +734,13 @@ static void nfc_llcp_tx_work(struct work_struct *work)
730 DUMP_PREFIX_OFFSET, 16, 1, 734 DUMP_PREFIX_OFFSET, 16, 1,
731 skb->data, skb->len, true); 735 skb->data, skb->len, true);
732 736
737 if (ptype == LLCP_PDU_DISC && sk != NULL &&
738 sk->sk_state == LLCP_DISCONNECTING) {
739 nfc_llcp_sock_unlink(&local->sockets, sk);
740 sock_orphan(sk);
741 sock_put(sk);
742 }
743
733 if (ptype == LLCP_PDU_I) 744 if (ptype == LLCP_PDU_I)
734 copy_skb = skb_copy(skb, GFP_ATOMIC); 745 copy_skb = skb_copy(skb, GFP_ATOMIC);
735 746
@@ -1579,6 +1590,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
1579 local->lto = 150; /* 1500 ms */ 1590 local->lto = 150; /* 1500 ms */
1580 local->rw = LLCP_MAX_RW; 1591 local->rw = LLCP_MAX_RW;
1581 local->miux = cpu_to_be16(LLCP_MAX_MIUX); 1592 local->miux = cpu_to_be16(LLCP_MAX_MIUX);
1593 local->local_wks = 0x1; /* LLC Link Management */
1582 1594
1583 nfc_llcp_build_gb(local); 1595 nfc_llcp_build_gb(local);
1584 1596
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 380253eccb74..d308402b67d8 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -571,7 +571,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
571 if (sk->sk_shutdown == SHUTDOWN_MASK) 571 if (sk->sk_shutdown == SHUTDOWN_MASK)
572 mask |= POLLHUP; 572 mask |= POLLHUP;
573 573
574 if (sock_writeable(sk)) 574 if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
575 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 575 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
576 else 576 else
577 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 577 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
@@ -603,7 +603,7 @@ static int llcp_sock_release(struct socket *sock)
603 603
604 /* Send a DISC */ 604 /* Send a DISC */
605 if (sk->sk_state == LLCP_CONNECTED) 605 if (sk->sk_state == LLCP_CONNECTED)
606 nfc_llcp_disconnect(llcp_sock); 606 nfc_llcp_send_disconnect(llcp_sock);
607 607
608 if (sk->sk_state == LLCP_LISTEN) { 608 if (sk->sk_state == LLCP_LISTEN) {
609 struct nfc_llcp_sock *lsk, *n; 609 struct nfc_llcp_sock *lsk, *n;
@@ -614,7 +614,7 @@ static int llcp_sock_release(struct socket *sock)
614 accept_sk = &lsk->sk; 614 accept_sk = &lsk->sk;
615 lock_sock(accept_sk); 615 lock_sock(accept_sk);
616 616
617 nfc_llcp_disconnect(lsk); 617 nfc_llcp_send_disconnect(lsk);
618 nfc_llcp_accept_unlink(accept_sk); 618 nfc_llcp_accept_unlink(accept_sk);
619 619
620 release_sock(accept_sk); 620 release_sock(accept_sk);
@@ -626,6 +626,13 @@ static int llcp_sock_release(struct socket *sock)
626 626
627 release_sock(sk); 627 release_sock(sk);
628 628
629 /* Keep this sock alive and therefore do not remove it from the sockets
630 * list until the DISC PDU has been actually sent. Otherwise we would
631 * reply with DM PDUs before sending the DISC one.
632 */
633 if (sk->sk_state == LLCP_DISCONNECTING)
634 return err;
635
629 if (sock->type == SOCK_RAW) 636 if (sock->type == SOCK_RAW)
630 nfc_llcp_sock_unlink(&local->raw_sockets, sk); 637 nfc_llcp_sock_unlink(&local->raw_sockets, sk);
631 else 638 else
@@ -722,14 +729,16 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
722 if (ret) 729 if (ret)
723 goto sock_unlink; 730 goto sock_unlink;
724 731
732 sk->sk_state = LLCP_CONNECTING;
733
725 ret = sock_wait_state(sk, LLCP_CONNECTED, 734 ret = sock_wait_state(sk, LLCP_CONNECTED,
726 sock_sndtimeo(sk, flags & O_NONBLOCK)); 735 sock_sndtimeo(sk, flags & O_NONBLOCK));
727 if (ret) 736 if (ret && ret != -EINPROGRESS)
728 goto sock_unlink; 737 goto sock_unlink;
729 738
730 release_sock(sk); 739 release_sock(sk);
731 740
732 return 0; 741 return ret;
733 742
734sock_unlink: 743sock_unlink:
735 nfc_llcp_put_ssap(local, llcp_sock->ssap); 744 nfc_llcp_put_ssap(local, llcp_sock->ssap);
diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig
index 6d69b5f0f19b..a4f1e42e3481 100644
--- a/net/nfc/nci/Kconfig
+++ b/net/nfc/nci/Kconfig
@@ -8,3 +8,14 @@ config NFC_NCI
8 8
9 Say Y here to compile NCI support into the kernel or say M to 9 Say Y here to compile NCI support into the kernel or say M to
10 compile it as module (nci). 10 compile it as module (nci).
11
12config NFC_NCI_SPI
13 depends on NFC_NCI && SPI
14 select CRC_CCITT
15 bool "NCI over SPI protocol support"
16 default n
17 help
18 NCI (NFC Controller Interface) is a communication protocol between
19 an NFC Controller (NFCC) and a Device Host (DH).
20
21 Say yes if you use an NCI driver that requires SPI link layer.
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index cdb3a2e44471..7aeedc43187d 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -4,4 +4,6 @@
4 4
5obj-$(CONFIG_NFC_NCI) += nci.o 5obj-$(CONFIG_NFC_NCI) += nci.o
6 6
7nci-objs := core.o data.o lib.o ntf.o rsp.o \ No newline at end of file 7nci-objs := core.o data.o lib.o ntf.o rsp.o
8
9nci-$(CONFIG_NFC_NCI_SPI) += spi.o
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 48ada0ec749e..b943d46a1644 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -636,6 +636,21 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
636 return rc; 636 return rc;
637} 637}
638 638
639static int nci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx)
640{
641 return 0;
642}
643
644static int nci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx)
645{
646 return 0;
647}
648
649static int nci_discover_se(struct nfc_dev *nfc_dev)
650{
651 return 0;
652}
653
639static struct nfc_ops nci_nfc_ops = { 654static struct nfc_ops nci_nfc_ops = {
640 .dev_up = nci_dev_up, 655 .dev_up = nci_dev_up,
641 .dev_down = nci_dev_down, 656 .dev_down = nci_dev_down,
@@ -646,6 +661,9 @@ static struct nfc_ops nci_nfc_ops = {
646 .activate_target = nci_activate_target, 661 .activate_target = nci_activate_target,
647 .deactivate_target = nci_deactivate_target, 662 .deactivate_target = nci_deactivate_target,
648 .im_transceive = nci_transceive, 663 .im_transceive = nci_transceive,
664 .enable_se = nci_enable_se,
665 .disable_se = nci_disable_se,
666 .discover_se = nci_discover_se,
649}; 667};
650 668
651/* ---- Interface to NCI drivers ---- */ 669/* ---- Interface to NCI drivers ---- */
@@ -658,7 +676,6 @@ static struct nfc_ops nci_nfc_ops = {
658 */ 676 */
659struct nci_dev *nci_allocate_device(struct nci_ops *ops, 677struct nci_dev *nci_allocate_device(struct nci_ops *ops,
660 __u32 supported_protocols, 678 __u32 supported_protocols,
661 __u32 supported_se,
662 int tx_headroom, int tx_tailroom) 679 int tx_headroom, int tx_tailroom)
663{ 680{
664 struct nci_dev *ndev; 681 struct nci_dev *ndev;
@@ -681,7 +698,6 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
681 698
682 ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, 699 ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops,
683 supported_protocols, 700 supported_protocols,
684 supported_se,
685 tx_headroom + NCI_DATA_HDR_SIZE, 701 tx_headroom + NCI_DATA_HDR_SIZE,
686 tx_tailroom); 702 tx_tailroom);
687 if (!ndev->nfc_dev) 703 if (!ndev->nfc_dev)
@@ -797,12 +813,11 @@ EXPORT_SYMBOL(nci_unregister_device);
797/** 813/**
798 * nci_recv_frame - receive frame from NCI drivers 814 * nci_recv_frame - receive frame from NCI drivers
799 * 815 *
816 * @ndev: The nci device
800 * @skb: The sk_buff to receive 817 * @skb: The sk_buff to receive
801 */ 818 */
802int nci_recv_frame(struct sk_buff *skb) 819int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb)
803{ 820{
804 struct nci_dev *ndev = (struct nci_dev *) skb->dev;
805
806 pr_debug("len %d\n", skb->len); 821 pr_debug("len %d\n", skb->len);
807 822
808 if (!ndev || (!test_bit(NCI_UP, &ndev->flags) && 823 if (!ndev || (!test_bit(NCI_UP, &ndev->flags) &&
@@ -819,10 +834,8 @@ int nci_recv_frame(struct sk_buff *skb)
819} 834}
820EXPORT_SYMBOL(nci_recv_frame); 835EXPORT_SYMBOL(nci_recv_frame);
821 836
822static int nci_send_frame(struct sk_buff *skb) 837static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
823{ 838{
824 struct nci_dev *ndev = (struct nci_dev *) skb->dev;
825
826 pr_debug("len %d\n", skb->len); 839 pr_debug("len %d\n", skb->len);
827 840
828 if (!ndev) { 841 if (!ndev) {
@@ -833,7 +846,7 @@ static int nci_send_frame(struct sk_buff *skb)
833 /* Get rid of skb owner, prior to sending to the driver. */ 846 /* Get rid of skb owner, prior to sending to the driver. */
834 skb_orphan(skb); 847 skb_orphan(skb);
835 848
836 return ndev->ops->send(skb); 849 return ndev->ops->send(ndev, skb);
837} 850}
838 851
839/* Send NCI command */ 852/* Send NCI command */
@@ -861,8 +874,6 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
861 if (plen) 874 if (plen)
862 memcpy(skb_put(skb, plen), payload, plen); 875 memcpy(skb_put(skb, plen), payload, plen);
863 876
864 skb->dev = (void *) ndev;
865
866 skb_queue_tail(&ndev->cmd_q, skb); 877 skb_queue_tail(&ndev->cmd_q, skb);
867 queue_work(ndev->cmd_wq, &ndev->cmd_work); 878 queue_work(ndev->cmd_wq, &ndev->cmd_work);
868 879
@@ -894,7 +905,7 @@ static void nci_tx_work(struct work_struct *work)
894 nci_conn_id(skb->data), 905 nci_conn_id(skb->data),
895 nci_plen(skb->data)); 906 nci_plen(skb->data));
896 907
897 nci_send_frame(skb); 908 nci_send_frame(ndev, skb);
898 909
899 mod_timer(&ndev->data_timer, 910 mod_timer(&ndev->data_timer,
900 jiffies + msecs_to_jiffies(NCI_DATA_TIMEOUT)); 911 jiffies + msecs_to_jiffies(NCI_DATA_TIMEOUT));
@@ -963,7 +974,7 @@ static void nci_cmd_work(struct work_struct *work)
963 nci_opcode_oid(nci_opcode(skb->data)), 974 nci_opcode_oid(nci_opcode(skb->data)),
964 nci_plen(skb->data)); 975 nci_plen(skb->data));
965 976
966 nci_send_frame(skb); 977 nci_send_frame(ndev, skb);
967 978
968 mod_timer(&ndev->cmd_timer, 979 mod_timer(&ndev->cmd_timer,
969 jiffies + msecs_to_jiffies(NCI_CMD_TIMEOUT)); 980 jiffies + msecs_to_jiffies(NCI_CMD_TIMEOUT));
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 76c48c5324f8..2a9399dd6c68 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -80,8 +80,6 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev,
80 80
81 nci_mt_set((__u8 *)hdr, NCI_MT_DATA_PKT); 81 nci_mt_set((__u8 *)hdr, NCI_MT_DATA_PKT);
82 nci_pbf_set((__u8 *)hdr, pbf); 82 nci_pbf_set((__u8 *)hdr, pbf);
83
84 skb->dev = (void *) ndev;
85} 83}
86 84
87static int nci_queue_tx_data_frags(struct nci_dev *ndev, 85static int nci_queue_tx_data_frags(struct nci_dev *ndev,
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
new file mode 100644
index 000000000000..c7cf37ba7298
--- /dev/null
+++ b/net/nfc/nci/spi.c
@@ -0,0 +1,378 @@
1/*
2 * Copyright (C) 2013 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 */
18
19#define pr_fmt(fmt) "nci_spi: %s: " fmt, __func__
20
21#include <linux/export.h>
22#include <linux/spi/spi.h>
23#include <linux/crc-ccitt.h>
24#include <linux/nfc.h>
25#include <net/nfc/nci_core.h>
26
27#define NCI_SPI_HDR_LEN 4
28#define NCI_SPI_CRC_LEN 2
29#define NCI_SPI_ACK_SHIFT 6
30#define NCI_SPI_MSB_PAYLOAD_MASK 0x3F
31
32#define NCI_SPI_SEND_TIMEOUT (NCI_CMD_TIMEOUT > NCI_DATA_TIMEOUT ? \
33 NCI_CMD_TIMEOUT : NCI_DATA_TIMEOUT)
34
35#define NCI_SPI_DIRECT_WRITE 0x01
36#define NCI_SPI_DIRECT_READ 0x02
37
38#define ACKNOWLEDGE_NONE 0
39#define ACKNOWLEDGE_ACK 1
40#define ACKNOWLEDGE_NACK 2
41
42#define CRC_INIT 0xFFFF
43
44static int nci_spi_open(struct nci_dev *nci_dev)
45{
46 struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev);
47
48 return ndev->ops->open(ndev);
49}
50
51static int nci_spi_close(struct nci_dev *nci_dev)
52{
53 struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev);
54
55 return ndev->ops->close(ndev);
56}
57
58static int __nci_spi_send(struct nci_spi_dev *ndev, struct sk_buff *skb)
59{
60 struct spi_message m;
61 struct spi_transfer t;
62
63 t.tx_buf = skb->data;
64 t.len = skb->len;
65 t.cs_change = 0;
66 t.delay_usecs = ndev->xfer_udelay;
67
68 spi_message_init(&m);
69 spi_message_add_tail(&t, &m);
70
71 return spi_sync(ndev->spi, &m);
72}
73
74static int nci_spi_send(struct nci_dev *nci_dev, struct sk_buff *skb)
75{
76 struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev);
77 unsigned int payload_len = skb->len;
78 unsigned char *hdr;
79 int ret;
80 long completion_rc;
81
82 ndev->ops->deassert_int(ndev);
83
84 /* add the NCI SPI header to the start of the buffer */
85 hdr = skb_push(skb, NCI_SPI_HDR_LEN);
86 hdr[0] = NCI_SPI_DIRECT_WRITE;
87 hdr[1] = ndev->acknowledge_mode;
88 hdr[2] = payload_len >> 8;
89 hdr[3] = payload_len & 0xFF;
90
91 if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) {
92 u16 crc;
93
94 crc = crc_ccitt(CRC_INIT, skb->data, skb->len);
95 *skb_put(skb, 1) = crc >> 8;
96 *skb_put(skb, 1) = crc & 0xFF;
97 }
98
99 ret = __nci_spi_send(ndev, skb);
100
101 kfree_skb(skb);
102 ndev->ops->assert_int(ndev);
103
104 if (ret != 0 || ndev->acknowledge_mode == NCI_SPI_CRC_DISABLED)
105 goto done;
106
107 init_completion(&ndev->req_completion);
108 completion_rc =
109 wait_for_completion_interruptible_timeout(&ndev->req_completion,
110 NCI_SPI_SEND_TIMEOUT);
111
112 if (completion_rc <= 0 || ndev->req_result == ACKNOWLEDGE_NACK)
113 ret = -EIO;
114
115done:
116 return ret;
117}
118
119static struct nci_ops nci_spi_ops = {
120 .open = nci_spi_open,
121 .close = nci_spi_close,
122 .send = nci_spi_send,
123};
124
125/* ---- Interface to NCI SPI drivers ---- */
126
127/**
128 * nci_spi_allocate_device - allocate a new nci spi device
129 *
130 * @spi: SPI device
131 * @ops: device operations
132 * @supported_protocols: NFC protocols supported by the device
133 * @supported_se: NFC Secure Elements supported by the device
134 * @acknowledge_mode: Acknowledge mode used by the device
135 * @delay: delay between transactions in us
136 */
137struct nci_spi_dev *nci_spi_allocate_device(struct spi_device *spi,
138 struct nci_spi_ops *ops,
139 u32 supported_protocols,
140 u32 supported_se,
141 u8 acknowledge_mode,
142 unsigned int delay)
143{
144 struct nci_spi_dev *ndev;
145 int tailroom = 0;
146
147 if (!ops->open || !ops->close || !ops->assert_int || !ops->deassert_int)
148 return NULL;
149
150 if (!supported_protocols)
151 return NULL;
152
153 ndev = devm_kzalloc(&spi->dev, sizeof(struct nci_dev), GFP_KERNEL);
154 if (!ndev)
155 return NULL;
156
157 ndev->ops = ops;
158 ndev->acknowledge_mode = acknowledge_mode;
159 ndev->xfer_udelay = delay;
160
161 if (acknowledge_mode == NCI_SPI_CRC_ENABLED)
162 tailroom += NCI_SPI_CRC_LEN;
163
164 ndev->nci_dev = nci_allocate_device(&nci_spi_ops, supported_protocols,
165 NCI_SPI_HDR_LEN, tailroom);
166 if (!ndev->nci_dev)
167 return NULL;
168
169 nci_set_drvdata(ndev->nci_dev, ndev);
170
171 return ndev;
172}
173EXPORT_SYMBOL_GPL(nci_spi_allocate_device);
174
175/**
176 * nci_spi_free_device - deallocate nci spi device
177 *
178 * @ndev: The nci spi device to deallocate
179 */
180void nci_spi_free_device(struct nci_spi_dev *ndev)
181{
182 nci_free_device(ndev->nci_dev);
183}
184EXPORT_SYMBOL_GPL(nci_spi_free_device);
185
186/**
187 * nci_spi_register_device - register a nci spi device in the nfc subsystem
188 *
189 * @pdev: The nci spi device to register
190 */
191int nci_spi_register_device(struct nci_spi_dev *ndev)
192{
193 return nci_register_device(ndev->nci_dev);
194}
195EXPORT_SYMBOL_GPL(nci_spi_register_device);
196
197/**
198 * nci_spi_unregister_device - unregister a nci spi device in the nfc subsystem
199 *
200 * @dev: The nci spi device to unregister
201 */
202void nci_spi_unregister_device(struct nci_spi_dev *ndev)
203{
204 nci_unregister_device(ndev->nci_dev);
205}
206EXPORT_SYMBOL_GPL(nci_spi_unregister_device);
207
208static int send_acknowledge(struct nci_spi_dev *ndev, u8 acknowledge)
209{
210 struct sk_buff *skb;
211 unsigned char *hdr;
212 u16 crc;
213 int ret;
214
215 skb = nci_skb_alloc(ndev->nci_dev, 0, GFP_KERNEL);
216
217 /* add the NCI SPI header to the start of the buffer */
218 hdr = skb_push(skb, NCI_SPI_HDR_LEN);
219 hdr[0] = NCI_SPI_DIRECT_WRITE;
220 hdr[1] = NCI_SPI_CRC_ENABLED;
221 hdr[2] = acknowledge << NCI_SPI_ACK_SHIFT;
222 hdr[3] = 0;
223
224 crc = crc_ccitt(CRC_INIT, skb->data, skb->len);
225 *skb_put(skb, 1) = crc >> 8;
226 *skb_put(skb, 1) = crc & 0xFF;
227
228 ret = __nci_spi_send(ndev, skb);
229
230 kfree_skb(skb);
231
232 return ret;
233}
234
235static struct sk_buff *__nci_spi_recv_frame(struct nci_spi_dev *ndev)
236{
237 struct sk_buff *skb;
238 struct spi_message m;
239 unsigned char req[2], resp_hdr[2];
240 struct spi_transfer tx, rx;
241 unsigned short rx_len = 0;
242 int ret;
243
244 spi_message_init(&m);
245 req[0] = NCI_SPI_DIRECT_READ;
246 req[1] = ndev->acknowledge_mode;
247 tx.tx_buf = req;
248 tx.len = 2;
249 tx.cs_change = 0;
250 spi_message_add_tail(&tx, &m);
251 rx.rx_buf = resp_hdr;
252 rx.len = 2;
253 rx.cs_change = 1;
254 spi_message_add_tail(&rx, &m);
255 ret = spi_sync(ndev->spi, &m);
256
257 if (ret)
258 return NULL;
259
260 if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED)
261 rx_len = ((resp_hdr[0] & NCI_SPI_MSB_PAYLOAD_MASK) << 8) +
262 resp_hdr[1] + NCI_SPI_CRC_LEN;
263 else
264 rx_len = (resp_hdr[0] << 8) | resp_hdr[1];
265
266 skb = nci_skb_alloc(ndev->nci_dev, rx_len, GFP_KERNEL);
267 if (!skb)
268 return NULL;
269
270 spi_message_init(&m);
271 rx.rx_buf = skb_put(skb, rx_len);
272 rx.len = rx_len;
273 rx.cs_change = 0;
274 rx.delay_usecs = ndev->xfer_udelay;
275 spi_message_add_tail(&rx, &m);
276 ret = spi_sync(ndev->spi, &m);
277
278 if (ret)
279 goto receive_error;
280
281 if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) {
282 *skb_push(skb, 1) = resp_hdr[1];
283 *skb_push(skb, 1) = resp_hdr[0];
284 }
285
286 return skb;
287
288receive_error:
289 kfree_skb(skb);
290
291 return NULL;
292}
293
294static int nci_spi_check_crc(struct sk_buff *skb)
295{
296 u16 crc_data = (skb->data[skb->len - 2] << 8) |
297 skb->data[skb->len - 1];
298 int ret;
299
300 ret = (crc_ccitt(CRC_INIT, skb->data, skb->len - NCI_SPI_CRC_LEN)
301 == crc_data);
302
303 skb_trim(skb, skb->len - NCI_SPI_CRC_LEN);
304
305 return ret;
306}
307
308static u8 nci_spi_get_ack(struct sk_buff *skb)
309{
310 u8 ret;
311
312 ret = skb->data[0] >> NCI_SPI_ACK_SHIFT;
313
314 /* Remove NFCC part of the header: ACK, NACK and MSB payload len */
315 skb_pull(skb, 2);
316
317 return ret;
318}
319
320/**
321 * nci_spi_recv_frame - receive frame from NCI SPI drivers
322 *
323 * @ndev: The nci spi device
324 * Context: can sleep
325 *
326 * This call may only be used from a context that may sleep. The sleep
327 * is non-interruptible, and has no timeout.
328 *
329 * It returns zero on success, else a negative error code.
330 */
331int nci_spi_recv_frame(struct nci_spi_dev *ndev)
332{
333 struct sk_buff *skb;
334 int ret = 0;
335
336 ndev->ops->deassert_int(ndev);
337
338 /* Retrieve frame from SPI */
339 skb = __nci_spi_recv_frame(ndev);
340 if (!skb) {
341 ret = -EIO;
342 goto done;
343 }
344
345 if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) {
346 if (!nci_spi_check_crc(skb)) {
347 send_acknowledge(ndev, ACKNOWLEDGE_NACK);
348 goto done;
349 }
350
351 /* In case of acknowledged mode: if ACK or NACK received,
352 * unblock completion of latest frame sent.
353 */
354 ndev->req_result = nci_spi_get_ack(skb);
355 if (ndev->req_result)
356 complete(&ndev->req_completion);
357 }
358
359 /* If there is no payload (ACK/NACK only frame),
360 * free the socket buffer
361 */
362 if (skb->len == 0) {
363 kfree_skb(skb);
364 goto done;
365 }
366
367 if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED)
368 send_acknowledge(ndev, ACKNOWLEDGE_ACK);
369
370 /* Forward skb to NCI core layer */
371 ret = nci_recv_frame(ndev->nci_dev, skb);
372
373done:
374 ndev->ops->assert_int(ndev);
375
376 return ret;
377}
378EXPORT_SYMBOL_GPL(nci_spi_recv_frame);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f0c4d61f37c0..68063b2025da 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -56,6 +56,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
56 [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 }, 56 [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 },
57 [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 }, 57 [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 },
58 [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, 58 [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
59 [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING,
60 .len = NFC_FIRMWARE_NAME_MAXSIZE },
59}; 61};
60 62
61static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { 63static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
@@ -424,6 +426,69 @@ free_msg:
424 return rc; 426 return rc;
425} 427}
426 428
429int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type)
430{
431 struct sk_buff *msg;
432 void *hdr;
433
434 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
435 if (!msg)
436 return -ENOMEM;
437
438 hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
439 NFC_EVENT_SE_ADDED);
440 if (!hdr)
441 goto free_msg;
442
443 if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
444 nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) ||
445 nla_put_u8(msg, NFC_ATTR_SE_TYPE, type))
446 goto nla_put_failure;
447
448 genlmsg_end(msg, hdr);
449
450 genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
451
452 return 0;
453
454nla_put_failure:
455 genlmsg_cancel(msg, hdr);
456free_msg:
457 nlmsg_free(msg);
458 return -EMSGSIZE;
459}
460
461int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx)
462{
463 struct sk_buff *msg;
464 void *hdr;
465
466 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
467 if (!msg)
468 return -ENOMEM;
469
470 hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
471 NFC_EVENT_SE_REMOVED);
472 if (!hdr)
473 goto free_msg;
474
475 if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
476 nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx))
477 goto nla_put_failure;
478
479 genlmsg_end(msg, hdr);
480
481 genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
482
483 return 0;
484
485nla_put_failure:
486 genlmsg_cancel(msg, hdr);
487free_msg:
488 nlmsg_free(msg);
489 return -EMSGSIZE;
490}
491
427static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, 492static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
428 u32 portid, u32 seq, 493 u32 portid, u32 seq,
429 struct netlink_callback *cb, 494 struct netlink_callback *cb,
@@ -442,7 +507,6 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
442 if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || 507 if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) ||
443 nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || 508 nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
444 nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || 509 nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) ||
445 nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) ||
446 nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || 510 nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) ||
447 nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) 511 nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode))
448 goto nla_put_failure; 512 goto nla_put_failure;
@@ -1025,6 +1089,195 @@ exit:
1025 return rc; 1089 return rc;
1026} 1090}
1027 1091
1092static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info)
1093{
1094 struct nfc_dev *dev;
1095 int rc;
1096 u32 idx;
1097 char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1];
1098
1099 if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
1100 return -EINVAL;
1101
1102 idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
1103
1104 dev = nfc_get_device(idx);
1105 if (!dev)
1106 return -ENODEV;
1107
1108 nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME],
1109 sizeof(firmware_name));
1110
1111 rc = nfc_fw_download(dev, firmware_name);
1112
1113 nfc_put_device(dev);
1114 return rc;
1115}
1116
1117int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
1118 u32 result)
1119{
1120 struct sk_buff *msg;
1121 void *hdr;
1122
1123 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1124 if (!msg)
1125 return -ENOMEM;
1126
1127 hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
1128 NFC_CMD_FW_DOWNLOAD);
1129 if (!hdr)
1130 goto free_msg;
1131
1132 if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) ||
1133 nla_put_u32(msg, NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS, result) ||
1134 nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
1135 goto nla_put_failure;
1136
1137 genlmsg_end(msg, hdr);
1138
1139 genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL);
1140
1141 return 0;
1142
1143nla_put_failure:
1144 genlmsg_cancel(msg, hdr);
1145free_msg:
1146 nlmsg_free(msg);
1147 return -EMSGSIZE;
1148}
1149
1150static int nfc_genl_enable_se(struct sk_buff *skb, struct genl_info *info)
1151{
1152 struct nfc_dev *dev;
1153 int rc;
1154 u32 idx, se_idx;
1155
1156 if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
1157 !info->attrs[NFC_ATTR_SE_INDEX])
1158 return -EINVAL;
1159
1160 idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
1161 se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]);
1162
1163 dev = nfc_get_device(idx);
1164 if (!dev)
1165 return -ENODEV;
1166
1167 rc = nfc_enable_se(dev, se_idx);
1168
1169 nfc_put_device(dev);
1170 return rc;
1171}
1172
1173static int nfc_genl_disable_se(struct sk_buff *skb, struct genl_info *info)
1174{
1175 struct nfc_dev *dev;
1176 int rc;
1177 u32 idx, se_idx;
1178
1179 if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
1180 !info->attrs[NFC_ATTR_SE_INDEX])
1181 return -EINVAL;
1182
1183 idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
1184 se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]);
1185
1186 dev = nfc_get_device(idx);
1187 if (!dev)
1188 return -ENODEV;
1189
1190 rc = nfc_disable_se(dev, se_idx);
1191
1192 nfc_put_device(dev);
1193 return rc;
1194}
1195
1196static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
1197 u32 portid, u32 seq,
1198 struct netlink_callback *cb,
1199 int flags)
1200{
1201 void *hdr;
1202 struct nfc_se *se, *n;
1203
1204 list_for_each_entry_safe(se, n, &dev->secure_elements, list) {
1205 hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags,
1206 NFC_CMD_GET_SE);
1207 if (!hdr)
1208 goto nla_put_failure;
1209
1210 if (cb)
1211 genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
1212
1213 if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
1214 nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
1215 nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type))
1216 goto nla_put_failure;
1217
1218 if (genlmsg_end(msg, hdr) < 0)
1219 goto nla_put_failure;
1220 }
1221
1222 return 0;
1223
1224nla_put_failure:
1225 genlmsg_cancel(msg, hdr);
1226 return -EMSGSIZE;
1227}
1228
1229static int nfc_genl_dump_ses(struct sk_buff *skb,
1230 struct netlink_callback *cb)
1231{
1232 struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
1233 struct nfc_dev *dev = (struct nfc_dev *) cb->args[1];
1234 bool first_call = false;
1235
1236 if (!iter) {
1237 first_call = true;
1238 iter = kmalloc(sizeof(struct class_dev_iter), GFP_KERNEL);
1239 if (!iter)
1240 return -ENOMEM;
1241 cb->args[0] = (long) iter;
1242 }
1243
1244 mutex_lock(&nfc_devlist_mutex);
1245
1246 cb->seq = nfc_devlist_generation;
1247
1248 if (first_call) {
1249 nfc_device_iter_init(iter);
1250 dev = nfc_device_iter_next(iter);
1251 }
1252
1253 while (dev) {
1254 int rc;
1255
1256 rc = nfc_genl_send_se(skb, dev, NETLINK_CB(cb->skb).portid,
1257 cb->nlh->nlmsg_seq, cb, NLM_F_MULTI);
1258 if (rc < 0)
1259 break;
1260
1261 dev = nfc_device_iter_next(iter);
1262 }
1263
1264 mutex_unlock(&nfc_devlist_mutex);
1265
1266 cb->args[1] = (long) dev;
1267
1268 return skb->len;
1269}
1270
1271static int nfc_genl_dump_ses_done(struct netlink_callback *cb)
1272{
1273 struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
1274
1275 nfc_device_iter_exit(iter);
1276 kfree(iter);
1277
1278 return 0;
1279}
1280
1028static struct genl_ops nfc_genl_ops[] = { 1281static struct genl_ops nfc_genl_ops[] = {
1029 { 1282 {
1030 .cmd = NFC_CMD_GET_DEVICE, 1283 .cmd = NFC_CMD_GET_DEVICE,
@@ -1084,6 +1337,27 @@ static struct genl_ops nfc_genl_ops[] = {
1084 .doit = nfc_genl_llc_sdreq, 1337 .doit = nfc_genl_llc_sdreq,
1085 .policy = nfc_genl_policy, 1338 .policy = nfc_genl_policy,
1086 }, 1339 },
1340 {
1341 .cmd = NFC_CMD_FW_DOWNLOAD,
1342 .doit = nfc_genl_fw_download,
1343 .policy = nfc_genl_policy,
1344 },
1345 {
1346 .cmd = NFC_CMD_ENABLE_SE,
1347 .doit = nfc_genl_enable_se,
1348 .policy = nfc_genl_policy,
1349 },
1350 {
1351 .cmd = NFC_CMD_DISABLE_SE,
1352 .doit = nfc_genl_disable_se,
1353 .policy = nfc_genl_policy,
1354 },
1355 {
1356 .cmd = NFC_CMD_GET_SE,
1357 .dumpit = nfc_genl_dump_ses,
1358 .done = nfc_genl_dump_ses_done,
1359 .policy = nfc_genl_policy,
1360 },
1087}; 1361};
1088 1362
1089 1363
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index afa1f84ba040..aaf606fc1faa 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -94,6 +94,9 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev);
94 94
95int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); 95int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list);
96 96
97int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type);
98int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx);
99
97struct nfc_dev *nfc_get_device(unsigned int idx); 100struct nfc_dev *nfc_get_device(unsigned int idx);
98 101
99static inline void nfc_put_device(struct nfc_dev *dev) 102static inline void nfc_put_device(struct nfc_dev *dev)
@@ -120,6 +123,10 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter)
120 class_dev_iter_exit(iter); 123 class_dev_iter_exit(iter);
121} 124}
122 125
126int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name);
127int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
128 u32 result);
129
123int nfc_dev_up(struct nfc_dev *dev); 130int nfc_dev_up(struct nfc_dev *dev);
124 131
125int nfc_dev_down(struct nfc_dev *dev); 132int nfc_dev_down(struct nfc_dev *dev);
@@ -139,4 +146,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx);
139int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, 146int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
140 data_exchange_cb_t cb, void *cb_context); 147 data_exchange_cb_t cb, void *cb_context);
141 148
149int nfc_enable_se(struct nfc_dev *dev, u32 se_idx);
150int nfc_disable_se(struct nfc_dev *dev, u32 se_idx);
151
142#endif /* __LOCAL_NFC_H */ 152#endif /* __LOCAL_NFC_H */
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index d9ea33c361be..6ecf491ad509 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -4,6 +4,7 @@
4 4
5config OPENVSWITCH 5config OPENVSWITCH
6 tristate "Open vSwitch" 6 tristate "Open vSwitch"
7 select LIBCRC32C
7 ---help--- 8 ---help---
8 Open vSwitch is a multilayer Ethernet switch targeted at virtualized 9 Open vSwitch is a multilayer Ethernet switch targeted at virtualized
9 environments. In addition to supporting a variety of features 10 environments. In addition to supporting a variety of features
@@ -26,3 +27,30 @@ config OPENVSWITCH
26 called openvswitch. 27 called openvswitch.
27 28
28 If unsure, say N. 29 If unsure, say N.
30
31config OPENVSWITCH_GRE
32 bool "Open vSwitch GRE tunneling support"
33 depends on INET
34 depends on OPENVSWITCH
35 depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m)
36 default y
37 ---help---
38 If you say Y here, then the Open vSwitch will be able create GRE
39 vport.
40
41 Say N to exclude this support and reduce the binary size.
42
43 If unsure, say Y.
44
45config OPENVSWITCH_VXLAN
46 bool "Open vSwitch VXLAN tunneling support"
47 depends on INET
48 depends on OPENVSWITCH
49 depends on VXLAN && !(OPENVSWITCH=y && VXLAN=m)
50 default y
51 ---help---
52 If you say Y here, then the Open vSwitch will be able create vxlan vport.
53
54 Say N to exclude this support and reduce the binary size.
55
56 If unsure, say Y.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 15e7384745c1..ea36e99089af 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -11,4 +11,12 @@ openvswitch-y := \
11 flow.o \ 11 flow.o \
12 vport.o \ 12 vport.o \
13 vport-internal_dev.o \ 13 vport-internal_dev.o \
14 vport-netdev.o \ 14 vport-netdev.o
15
16ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
17openvswitch-y += vport-vxlan.o
18endif
19
20ifneq ($(CONFIG_OPENVSWITCH_GRE),)
21openvswitch-y += vport-gre.o
22endif
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 894b6cbdd929..65cfaa816075 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2012 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -22,6 +22,7 @@
22#include <linux/in.h> 22#include <linux/in.h>
23#include <linux/ip.h> 23#include <linux/ip.h>
24#include <linux/openvswitch.h> 24#include <linux/openvswitch.h>
25#include <linux/sctp.h>
25#include <linux/tcp.h> 26#include <linux/tcp.h>
26#include <linux/udp.h> 27#include <linux/udp.h>
27#include <linux/in6.h> 28#include <linux/in6.h>
@@ -31,6 +32,7 @@
31#include <net/ipv6.h> 32#include <net/ipv6.h>
32#include <net/checksum.h> 33#include <net/checksum.h>
33#include <net/dsfield.h> 34#include <net/dsfield.h>
35#include <net/sctp/checksum.h>
34 36
35#include "datapath.h" 37#include "datapath.h"
36#include "vport.h" 38#include "vport.h"
@@ -130,9 +132,13 @@ static int set_eth_addr(struct sk_buff *skb,
130 if (unlikely(err)) 132 if (unlikely(err))
131 return err; 133 return err;
132 134
135 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
136
133 memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); 137 memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN);
134 memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); 138 memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN);
135 139
140 ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
141
136 return 0; 142 return 0;
137} 143}
138 144
@@ -348,6 +354,39 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
348 return 0; 354 return 0;
349} 355}
350 356
357static int set_sctp(struct sk_buff *skb,
358 const struct ovs_key_sctp *sctp_port_key)
359{
360 struct sctphdr *sh;
361 int err;
362 unsigned int sctphoff = skb_transport_offset(skb);
363
364 err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
365 if (unlikely(err))
366 return err;
367
368 sh = sctp_hdr(skb);
369 if (sctp_port_key->sctp_src != sh->source ||
370 sctp_port_key->sctp_dst != sh->dest) {
371 __le32 old_correct_csum, new_csum, old_csum;
372
373 old_csum = sh->checksum;
374 old_correct_csum = sctp_compute_cksum(skb, sctphoff);
375
376 sh->source = sctp_port_key->sctp_src;
377 sh->dest = sctp_port_key->sctp_dst;
378
379 new_csum = sctp_compute_cksum(skb, sctphoff);
380
381 /* Carry any checksum errors through. */
382 sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
383
384 skb->rxhash = 0;
385 }
386
387 return 0;
388}
389
351static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) 390static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
352{ 391{
353 struct vport *vport; 392 struct vport *vport;
@@ -372,8 +411,10 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
372 const struct nlattr *a; 411 const struct nlattr *a;
373 int rem; 412 int rem;
374 413
414 BUG_ON(!OVS_CB(skb)->pkt_key);
415
375 upcall.cmd = OVS_PACKET_CMD_ACTION; 416 upcall.cmd = OVS_PACKET_CMD_ACTION;
376 upcall.key = &OVS_CB(skb)->flow->key; 417 upcall.key = OVS_CB(skb)->pkt_key;
377 upcall.userdata = NULL; 418 upcall.userdata = NULL;
378 upcall.portid = 0; 419 upcall.portid = 0;
379 420
@@ -432,6 +473,10 @@ static int execute_set_action(struct sk_buff *skb,
432 skb->mark = nla_get_u32(nested_attr); 473 skb->mark = nla_get_u32(nested_attr);
433 break; 474 break;
434 475
476 case OVS_KEY_ATTR_IPV4_TUNNEL:
477 OVS_CB(skb)->tun_key = nla_data(nested_attr);
478 break;
479
435 case OVS_KEY_ATTR_ETHERNET: 480 case OVS_KEY_ATTR_ETHERNET:
436 err = set_eth_addr(skb, nla_data(nested_attr)); 481 err = set_eth_addr(skb, nla_data(nested_attr));
437 break; 482 break;
@@ -451,6 +496,10 @@ static int execute_set_action(struct sk_buff *skb,
451 case OVS_KEY_ATTR_UDP: 496 case OVS_KEY_ATTR_UDP:
452 err = set_udp(skb, nla_data(nested_attr)); 497 err = set_udp(skb, nla_data(nested_attr));
453 break; 498 break;
499
500 case OVS_KEY_ATTR_SCTP:
501 err = set_sctp(skb, nla_data(nested_attr));
502 break;
454 } 503 }
455 504
456 return err; 505 return err;
@@ -527,6 +576,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
527{ 576{
528 struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); 577 struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
529 578
579 OVS_CB(skb)->tun_key = NULL;
530 return do_execute_actions(dp, skb, acts->actions, 580 return do_execute_actions(dp, skb, acts->actions,
531 acts->actions_len, false); 581 acts->actions_len, false);
532} 582}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d12d6b8b5e8b..2aa13bd7f2b2 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2012 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -165,7 +165,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
165{ 165{
166 struct datapath *dp = container_of(rcu, struct datapath, rcu); 166 struct datapath *dp = container_of(rcu, struct datapath, rcu);
167 167
168 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); 168 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false);
169 free_percpu(dp->stats_percpu); 169 free_percpu(dp->stats_percpu);
170 release_net(ovs_dp_get_net(dp)); 170 release_net(ovs_dp_get_net(dp));
171 kfree(dp->ports); 171 kfree(dp->ports);
@@ -226,19 +226,18 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
226 struct sw_flow_key key; 226 struct sw_flow_key key;
227 u64 *stats_counter; 227 u64 *stats_counter;
228 int error; 228 int error;
229 int key_len;
230 229
231 stats = this_cpu_ptr(dp->stats_percpu); 230 stats = this_cpu_ptr(dp->stats_percpu);
232 231
233 /* Extract flow from 'skb' into 'key'. */ 232 /* Extract flow from 'skb' into 'key'. */
234 error = ovs_flow_extract(skb, p->port_no, &key, &key_len); 233 error = ovs_flow_extract(skb, p->port_no, &key);
235 if (unlikely(error)) { 234 if (unlikely(error)) {
236 kfree_skb(skb); 235 kfree_skb(skb);
237 return; 236 return;
238 } 237 }
239 238
240 /* Look up flow. */ 239 /* Look up flow. */
241 flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len); 240 flow = ovs_flow_lookup(rcu_dereference(dp->table), &key);
242 if (unlikely(!flow)) { 241 if (unlikely(!flow)) {
243 struct dp_upcall_info upcall; 242 struct dp_upcall_info upcall;
244 243
@@ -253,6 +252,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
253 } 252 }
254 253
255 OVS_CB(skb)->flow = flow; 254 OVS_CB(skb)->flow = flow;
255 OVS_CB(skb)->pkt_key = &key;
256 256
257 stats_counter = &stats->n_hit; 257 stats_counter = &stats->n_hit;
258 ovs_flow_used(OVS_CB(skb)->flow, skb); 258 ovs_flow_used(OVS_CB(skb)->flow, skb);
@@ -362,6 +362,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
362static size_t key_attr_size(void) 362static size_t key_attr_size(void)
363{ 363{
364 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 364 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
365 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
366 + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
367 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
368 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
369 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
370 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
371 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
372 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
365 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 373 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
366 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 374 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
367 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 375 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
@@ -427,7 +435,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
427 upcall->dp_ifindex = dp_ifindex; 435 upcall->dp_ifindex = dp_ifindex;
428 436
429 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); 437 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
430 ovs_flow_to_nlattrs(upcall_info->key, user_skb); 438 ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb);
431 nla_nest_end(user_skb, nla); 439 nla_nest_end(user_skb, nla);
432 440
433 if (upcall_info->userdata) 441 if (upcall_info->userdata)
@@ -460,20 +468,93 @@ static int flush_flows(struct datapath *dp)
460 468
461 rcu_assign_pointer(dp->table, new_table); 469 rcu_assign_pointer(dp->table, new_table);
462 470
463 ovs_flow_tbl_deferred_destroy(old_table); 471 ovs_flow_tbl_destroy(old_table, true);
464 return 0; 472 return 0;
465} 473}
466 474
467static int validate_actions(const struct nlattr *attr, 475static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
468 const struct sw_flow_key *key, int depth); 476{
477
478 struct sw_flow_actions *acts;
479 int new_acts_size;
480 int req_size = NLA_ALIGN(attr_len);
481 int next_offset = offsetof(struct sw_flow_actions, actions) +
482 (*sfa)->actions_len;
483
484 if (req_size <= (ksize(*sfa) - next_offset))
485 goto out;
486
487 new_acts_size = ksize(*sfa) * 2;
488
489 if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
490 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
491 return ERR_PTR(-EMSGSIZE);
492 new_acts_size = MAX_ACTIONS_BUFSIZE;
493 }
494
495 acts = ovs_flow_actions_alloc(new_acts_size);
496 if (IS_ERR(acts))
497 return (void *)acts;
498
499 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
500 acts->actions_len = (*sfa)->actions_len;
501 kfree(*sfa);
502 *sfa = acts;
503
504out:
505 (*sfa)->actions_len += req_size;
506 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
507}
508
509static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
510{
511 struct nlattr *a;
512
513 a = reserve_sfa_size(sfa, nla_attr_size(len));
514 if (IS_ERR(a))
515 return PTR_ERR(a);
516
517 a->nla_type = attrtype;
518 a->nla_len = nla_attr_size(len);
519
520 if (data)
521 memcpy(nla_data(a), data, len);
522 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
523
524 return 0;
525}
526
527static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype)
528{
529 int used = (*sfa)->actions_len;
530 int err;
531
532 err = add_action(sfa, attrtype, NULL, 0);
533 if (err)
534 return err;
535
536 return used;
537}
469 538
470static int validate_sample(const struct nlattr *attr, 539static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset)
471 const struct sw_flow_key *key, int depth) 540{
541 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset);
542
543 a->nla_len = sfa->actions_len - st_offset;
544}
545
546static int validate_and_copy_actions(const struct nlattr *attr,
547 const struct sw_flow_key *key, int depth,
548 struct sw_flow_actions **sfa);
549
550static int validate_and_copy_sample(const struct nlattr *attr,
551 const struct sw_flow_key *key, int depth,
552 struct sw_flow_actions **sfa)
472{ 553{
473 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 554 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
474 const struct nlattr *probability, *actions; 555 const struct nlattr *probability, *actions;
475 const struct nlattr *a; 556 const struct nlattr *a;
476 int rem; 557 int rem, start, err, st_acts;
477 558
478 memset(attrs, 0, sizeof(attrs)); 559 memset(attrs, 0, sizeof(attrs));
479 nla_for_each_nested(a, attr, rem) { 560 nla_for_each_nested(a, attr, rem) {
@@ -492,7 +573,26 @@ static int validate_sample(const struct nlattr *attr,
492 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 573 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
493 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 574 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
494 return -EINVAL; 575 return -EINVAL;
495 return validate_actions(actions, key, depth + 1); 576
577 /* validation done, copy sample action. */
578 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
579 if (start < 0)
580 return start;
581 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32));
582 if (err)
583 return err;
584 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
585 if (st_acts < 0)
586 return st_acts;
587
588 err = validate_and_copy_actions(actions, key, depth + 1, sfa);
589 if (err)
590 return err;
591
592 add_nested_action_end(*sfa, st_acts);
593 add_nested_action_end(*sfa, start);
594
595 return 0;
496} 596}
497 597
498static int validate_tp_port(const struct sw_flow_key *flow_key) 598static int validate_tp_port(const struct sw_flow_key *flow_key)
@@ -508,8 +608,33 @@ static int validate_tp_port(const struct sw_flow_key *flow_key)
508 return -EINVAL; 608 return -EINVAL;
509} 609}
510 610
611static int validate_and_copy_set_tun(const struct nlattr *attr,
612 struct sw_flow_actions **sfa)
613{
614 struct sw_flow_match match;
615 struct sw_flow_key key;
616 int err, start;
617
618 ovs_match_init(&match, &key, NULL);
619 err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false);
620 if (err)
621 return err;
622
623 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
624 if (start < 0)
625 return start;
626
627 err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
628 sizeof(match.key->tun_key));
629 add_nested_action_end(*sfa, start);
630
631 return err;
632}
633
511static int validate_set(const struct nlattr *a, 634static int validate_set(const struct nlattr *a,
512 const struct sw_flow_key *flow_key) 635 const struct sw_flow_key *flow_key,
636 struct sw_flow_actions **sfa,
637 bool *set_tun)
513{ 638{
514 const struct nlattr *ovs_key = nla_data(a); 639 const struct nlattr *ovs_key = nla_data(a);
515 int key_type = nla_type(ovs_key); 640 int key_type = nla_type(ovs_key);
@@ -519,18 +644,27 @@ static int validate_set(const struct nlattr *a,
519 return -EINVAL; 644 return -EINVAL;
520 645
521 if (key_type > OVS_KEY_ATTR_MAX || 646 if (key_type > OVS_KEY_ATTR_MAX ||
522 nla_len(ovs_key) != ovs_key_lens[key_type]) 647 (ovs_key_lens[key_type] != nla_len(ovs_key) &&
648 ovs_key_lens[key_type] != -1))
523 return -EINVAL; 649 return -EINVAL;
524 650
525 switch (key_type) { 651 switch (key_type) {
526 const struct ovs_key_ipv4 *ipv4_key; 652 const struct ovs_key_ipv4 *ipv4_key;
527 const struct ovs_key_ipv6 *ipv6_key; 653 const struct ovs_key_ipv6 *ipv6_key;
654 int err;
528 655
529 case OVS_KEY_ATTR_PRIORITY: 656 case OVS_KEY_ATTR_PRIORITY:
530 case OVS_KEY_ATTR_SKB_MARK: 657 case OVS_KEY_ATTR_SKB_MARK:
531 case OVS_KEY_ATTR_ETHERNET: 658 case OVS_KEY_ATTR_ETHERNET:
532 break; 659 break;
533 660
661 case OVS_KEY_ATTR_TUNNEL:
662 *set_tun = true;
663 err = validate_and_copy_set_tun(a, sfa);
664 if (err)
665 return err;
666 break;
667
534 case OVS_KEY_ATTR_IPV4: 668 case OVS_KEY_ATTR_IPV4:
535 if (flow_key->eth.type != htons(ETH_P_IP)) 669 if (flow_key->eth.type != htons(ETH_P_IP))
536 return -EINVAL; 670 return -EINVAL;
@@ -578,6 +712,12 @@ static int validate_set(const struct nlattr *a,
578 712
579 return validate_tp_port(flow_key); 713 return validate_tp_port(flow_key);
580 714
715 case OVS_KEY_ATTR_SCTP:
716 if (flow_key->ip.proto != IPPROTO_SCTP)
717 return -EINVAL;
718
719 return validate_tp_port(flow_key);
720
581 default: 721 default:
582 return -EINVAL; 722 return -EINVAL;
583 } 723 }
@@ -606,8 +746,24 @@ static int validate_userspace(const struct nlattr *attr)
606 return 0; 746 return 0;
607} 747}
608 748
609static int validate_actions(const struct nlattr *attr, 749static int copy_action(const struct nlattr *from,
610 const struct sw_flow_key *key, int depth) 750 struct sw_flow_actions **sfa)
751{
752 int totlen = NLA_ALIGN(from->nla_len);
753 struct nlattr *to;
754
755 to = reserve_sfa_size(sfa, from->nla_len);
756 if (IS_ERR(to))
757 return PTR_ERR(to);
758
759 memcpy(to, from, totlen);
760 return 0;
761}
762
763static int validate_and_copy_actions(const struct nlattr *attr,
764 const struct sw_flow_key *key,
765 int depth,
766 struct sw_flow_actions **sfa)
611{ 767{
612 const struct nlattr *a; 768 const struct nlattr *a;
613 int rem, err; 769 int rem, err;
@@ -627,12 +783,14 @@ static int validate_actions(const struct nlattr *attr,
627 }; 783 };
628 const struct ovs_action_push_vlan *vlan; 784 const struct ovs_action_push_vlan *vlan;
629 int type = nla_type(a); 785 int type = nla_type(a);
786 bool skip_copy;
630 787
631 if (type > OVS_ACTION_ATTR_MAX || 788 if (type > OVS_ACTION_ATTR_MAX ||
632 (action_lens[type] != nla_len(a) && 789 (action_lens[type] != nla_len(a) &&
633 action_lens[type] != (u32)-1)) 790 action_lens[type] != (u32)-1))
634 return -EINVAL; 791 return -EINVAL;
635 792
793 skip_copy = false;
636 switch (type) { 794 switch (type) {
637 case OVS_ACTION_ATTR_UNSPEC: 795 case OVS_ACTION_ATTR_UNSPEC:
638 return -EINVAL; 796 return -EINVAL;
@@ -661,20 +819,26 @@ static int validate_actions(const struct nlattr *attr,
661 break; 819 break;
662 820
663 case OVS_ACTION_ATTR_SET: 821 case OVS_ACTION_ATTR_SET:
664 err = validate_set(a, key); 822 err = validate_set(a, key, sfa, &skip_copy);
665 if (err) 823 if (err)
666 return err; 824 return err;
667 break; 825 break;
668 826
669 case OVS_ACTION_ATTR_SAMPLE: 827 case OVS_ACTION_ATTR_SAMPLE:
670 err = validate_sample(a, key, depth); 828 err = validate_and_copy_sample(a, key, depth, sfa);
671 if (err) 829 if (err)
672 return err; 830 return err;
831 skip_copy = true;
673 break; 832 break;
674 833
675 default: 834 default:
676 return -EINVAL; 835 return -EINVAL;
677 } 836 }
837 if (!skip_copy) {
838 err = copy_action(a, sfa);
839 if (err)
840 return err;
841 }
678 } 842 }
679 843
680 if (rem > 0) 844 if (rem > 0)
@@ -702,7 +866,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
702 struct ethhdr *eth; 866 struct ethhdr *eth;
703 int len; 867 int len;
704 int err; 868 int err;
705 int key_len;
706 869
707 err = -EINVAL; 870 err = -EINVAL;
708 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 871 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -735,30 +898,25 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
735 if (IS_ERR(flow)) 898 if (IS_ERR(flow))
736 goto err_kfree_skb; 899 goto err_kfree_skb;
737 900
738 err = ovs_flow_extract(packet, -1, &flow->key, &key_len); 901 err = ovs_flow_extract(packet, -1, &flow->key);
739 if (err) 902 if (err)
740 goto err_flow_free; 903 goto err_flow_free;
741 904
742 err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, 905 err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]);
743 &flow->key.phy.skb_mark,
744 &flow->key.phy.in_port,
745 a[OVS_PACKET_ATTR_KEY]);
746 if (err) 906 if (err)
747 goto err_flow_free; 907 goto err_flow_free;
748 908 acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
749 err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
750 if (err)
751 goto err_flow_free;
752
753 flow->hash = ovs_flow_hash(&flow->key, key_len);
754
755 acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
756 err = PTR_ERR(acts); 909 err = PTR_ERR(acts);
757 if (IS_ERR(acts)) 910 if (IS_ERR(acts))
758 goto err_flow_free; 911 goto err_flow_free;
912
913 err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
759 rcu_assign_pointer(flow->sf_acts, acts); 914 rcu_assign_pointer(flow->sf_acts, acts);
915 if (err)
916 goto err_flow_free;
760 917
761 OVS_CB(packet)->flow = flow; 918 OVS_CB(packet)->flow = flow;
919 OVS_CB(packet)->pkt_key = &flow->key;
762 packet->priority = flow->key.phy.priority; 920 packet->priority = flow->key.phy.priority;
763 packet->mark = flow->key.phy.skb_mark; 921 packet->mark = flow->key.phy.skb_mark;
764 922
@@ -773,13 +931,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
773 local_bh_enable(); 931 local_bh_enable();
774 rcu_read_unlock(); 932 rcu_read_unlock();
775 933
776 ovs_flow_free(flow); 934 ovs_flow_free(flow, false);
777 return err; 935 return err;
778 936
779err_unlock: 937err_unlock:
780 rcu_read_unlock(); 938 rcu_read_unlock();
781err_flow_free: 939err_flow_free:
782 ovs_flow_free(flow); 940 ovs_flow_free(flow, false);
783err_kfree_skb: 941err_kfree_skb:
784 kfree_skb(packet); 942 kfree_skb(packet);
785err: 943err:
@@ -802,9 +960,10 @@ static struct genl_ops dp_packet_genl_ops[] = {
802 960
803static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) 961static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
804{ 962{
963 struct flow_table *table;
805 int i; 964 int i;
806 struct flow_table *table = ovsl_dereference(dp->table);
807 965
966 table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held());
808 stats->n_flows = ovs_flow_tbl_count(table); 967 stats->n_flows = ovs_flow_tbl_count(table);
809 968
810 stats->n_hit = stats->n_missed = stats->n_lost = 0; 969 stats->n_hit = stats->n_missed = stats->n_lost = 0;
@@ -846,10 +1005,105 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = {
846 .name = OVS_FLOW_MCGROUP 1005 .name = OVS_FLOW_MCGROUP
847}; 1006};
848 1007
1008static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb);
1009static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1010{
1011 const struct nlattr *a;
1012 struct nlattr *start;
1013 int err = 0, rem;
1014
1015 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
1016 if (!start)
1017 return -EMSGSIZE;
1018
1019 nla_for_each_nested(a, attr, rem) {
1020 int type = nla_type(a);
1021 struct nlattr *st_sample;
1022
1023 switch (type) {
1024 case OVS_SAMPLE_ATTR_PROBABILITY:
1025 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a)))
1026 return -EMSGSIZE;
1027 break;
1028 case OVS_SAMPLE_ATTR_ACTIONS:
1029 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
1030 if (!st_sample)
1031 return -EMSGSIZE;
1032 err = actions_to_attr(nla_data(a), nla_len(a), skb);
1033 if (err)
1034 return err;
1035 nla_nest_end(skb, st_sample);
1036 break;
1037 }
1038 }
1039
1040 nla_nest_end(skb, start);
1041 return err;
1042}
1043
1044static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1045{
1046 const struct nlattr *ovs_key = nla_data(a);
1047 int key_type = nla_type(ovs_key);
1048 struct nlattr *start;
1049 int err;
1050
1051 switch (key_type) {
1052 case OVS_KEY_ATTR_IPV4_TUNNEL:
1053 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1054 if (!start)
1055 return -EMSGSIZE;
1056
1057 err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
1058 nla_data(ovs_key));
1059 if (err)
1060 return err;
1061 nla_nest_end(skb, start);
1062 break;
1063 default:
1064 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1065 return -EMSGSIZE;
1066 break;
1067 }
1068
1069 return 0;
1070}
1071
1072static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
1073{
1074 const struct nlattr *a;
1075 int rem, err;
1076
1077 nla_for_each_attr(a, attr, len, rem) {
1078 int type = nla_type(a);
1079
1080 switch (type) {
1081 case OVS_ACTION_ATTR_SET:
1082 err = set_action_to_attr(a, skb);
1083 if (err)
1084 return err;
1085 break;
1086
1087 case OVS_ACTION_ATTR_SAMPLE:
1088 err = sample_action_to_attr(a, skb);
1089 if (err)
1090 return err;
1091 break;
1092 default:
1093 if (nla_put(skb, type, nla_len(a), nla_data(a)))
1094 return -EMSGSIZE;
1095 break;
1096 }
1097 }
1098
1099 return 0;
1100}
1101
849static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) 1102static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
850{ 1103{
851 return NLMSG_ALIGN(sizeof(struct ovs_header)) 1104 return NLMSG_ALIGN(sizeof(struct ovs_header))
852 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ 1105 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
1106 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
853 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 1107 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
854 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 1108 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
855 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ 1109 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -862,7 +1116,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
862 u32 seq, u32 flags, u8 cmd) 1116 u32 seq, u32 flags, u8 cmd)
863{ 1117{
864 const int skb_orig_len = skb->len; 1118 const int skb_orig_len = skb->len;
865 const struct sw_flow_actions *sf_acts; 1119 struct nlattr *start;
866 struct ovs_flow_stats stats; 1120 struct ovs_flow_stats stats;
867 struct ovs_header *ovs_header; 1121 struct ovs_header *ovs_header;
868 struct nlattr *nla; 1122 struct nlattr *nla;
@@ -870,20 +1124,31 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
870 u8 tcp_flags; 1124 u8 tcp_flags;
871 int err; 1125 int err;
872 1126
873 sf_acts = ovsl_dereference(flow->sf_acts);
874
875 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 1127 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
876 if (!ovs_header) 1128 if (!ovs_header)
877 return -EMSGSIZE; 1129 return -EMSGSIZE;
878 1130
879 ovs_header->dp_ifindex = get_dpifindex(dp); 1131 ovs_header->dp_ifindex = get_dpifindex(dp);
880 1132
1133 /* Fill flow key. */
881 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); 1134 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
882 if (!nla) 1135 if (!nla)
883 goto nla_put_failure; 1136 goto nla_put_failure;
884 err = ovs_flow_to_nlattrs(&flow->key, skb); 1137
1138 err = ovs_flow_to_nlattrs(&flow->unmasked_key,
1139 &flow->unmasked_key, skb);
1140 if (err)
1141 goto error;
1142 nla_nest_end(skb, nla);
1143
1144 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
1145 if (!nla)
1146 goto nla_put_failure;
1147
1148 err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb);
885 if (err) 1149 if (err)
886 goto error; 1150 goto error;
1151
887 nla_nest_end(skb, nla); 1152 nla_nest_end(skb, nla);
888 1153
889 spin_lock_bh(&flow->lock); 1154 spin_lock_bh(&flow->lock);
@@ -916,10 +1181,24 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
916 * This can only fail for dump operations because the skb is always 1181 * This can only fail for dump operations because the skb is always
917 * properly sized for single flows. 1182 * properly sized for single flows.
918 */ 1183 */
919 err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len, 1184 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
920 sf_acts->actions); 1185 if (start) {
921 if (err < 0 && skb_orig_len) 1186 const struct sw_flow_actions *sf_acts;
922 goto error; 1187
1188 sf_acts = rcu_dereference_check(flow->sf_acts,
1189 lockdep_ovsl_is_held());
1190
1191 err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
1192 if (!err)
1193 nla_nest_end(skb, start);
1194 else {
1195 if (skb_orig_len)
1196 goto error;
1197
1198 nla_nest_cancel(skb, start);
1199 }
1200 } else if (skb_orig_len)
1201 goto nla_put_failure;
923 1202
924 return genlmsg_end(skb, ovs_header); 1203 return genlmsg_end(skb, ovs_header);
925 1204
@@ -959,27 +1238,41 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
959{ 1238{
960 struct nlattr **a = info->attrs; 1239 struct nlattr **a = info->attrs;
961 struct ovs_header *ovs_header = info->userhdr; 1240 struct ovs_header *ovs_header = info->userhdr;
962 struct sw_flow_key key; 1241 struct sw_flow_key key, masked_key;
963 struct sw_flow *flow; 1242 struct sw_flow *flow = NULL;
1243 struct sw_flow_mask mask;
964 struct sk_buff *reply; 1244 struct sk_buff *reply;
965 struct datapath *dp; 1245 struct datapath *dp;
966 struct flow_table *table; 1246 struct flow_table *table;
1247 struct sw_flow_actions *acts = NULL;
1248 struct sw_flow_match match;
967 int error; 1249 int error;
968 int key_len;
969 1250
970 /* Extract key. */ 1251 /* Extract key. */
971 error = -EINVAL; 1252 error = -EINVAL;
972 if (!a[OVS_FLOW_ATTR_KEY]) 1253 if (!a[OVS_FLOW_ATTR_KEY])
973 goto error; 1254 goto error;
974 error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1255
1256 ovs_match_init(&match, &key, &mask);
1257 error = ovs_match_from_nlattrs(&match,
1258 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
975 if (error) 1259 if (error)
976 goto error; 1260 goto error;
977 1261
978 /* Validate actions. */ 1262 /* Validate actions. */
979 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1263 if (a[OVS_FLOW_ATTR_ACTIONS]) {
980 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); 1264 acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
981 if (error) 1265 error = PTR_ERR(acts);
1266 if (IS_ERR(acts))
982 goto error; 1267 goto error;
1268
1269 ovs_flow_key_mask(&masked_key, &key, &mask);
1270 error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
1271 &masked_key, 0, &acts);
1272 if (error) {
1273 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
1274 goto err_kfree;
1275 }
983 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { 1276 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
984 error = -EINVAL; 1277 error = -EINVAL;
985 goto error; 1278 goto error;
@@ -992,10 +1285,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
992 goto err_unlock_ovs; 1285 goto err_unlock_ovs;
993 1286
994 table = ovsl_dereference(dp->table); 1287 table = ovsl_dereference(dp->table);
995 flow = ovs_flow_tbl_lookup(table, &key, key_len);
996 if (!flow) {
997 struct sw_flow_actions *acts;
998 1288
1289 /* Check if this is a duplicate flow */
1290 flow = ovs_flow_lookup(table, &key);
1291 if (!flow) {
1292 struct sw_flow_mask *mask_p;
999 /* Bail out if we're not allowed to create a new flow. */ 1293 /* Bail out if we're not allowed to create a new flow. */
1000 error = -ENOENT; 1294 error = -ENOENT;
1001 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 1295 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
@@ -1008,7 +1302,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1008 new_table = ovs_flow_tbl_expand(table); 1302 new_table = ovs_flow_tbl_expand(table);
1009 if (!IS_ERR(new_table)) { 1303 if (!IS_ERR(new_table)) {
1010 rcu_assign_pointer(dp->table, new_table); 1304 rcu_assign_pointer(dp->table, new_table);
1011 ovs_flow_tbl_deferred_destroy(table); 1305 ovs_flow_tbl_destroy(table, true);
1012 table = ovsl_dereference(dp->table); 1306 table = ovsl_dereference(dp->table);
1013 } 1307 }
1014 } 1308 }
@@ -1019,27 +1313,35 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1019 error = PTR_ERR(flow); 1313 error = PTR_ERR(flow);
1020 goto err_unlock_ovs; 1314 goto err_unlock_ovs;
1021 } 1315 }
1022 flow->key = key;
1023 clear_stats(flow); 1316 clear_stats(flow);
1024 1317
1025 /* Obtain actions. */ 1318 flow->key = masked_key;
1026 acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); 1319 flow->unmasked_key = key;
1027 error = PTR_ERR(acts); 1320
1028 if (IS_ERR(acts)) 1321 /* Make sure mask is unique in the system */
1029 goto error_free_flow; 1322 mask_p = ovs_sw_flow_mask_find(table, &mask);
1323 if (!mask_p) {
1324 /* Allocate a new mask if none exsits. */
1325 mask_p = ovs_sw_flow_mask_alloc();
1326 if (!mask_p)
1327 goto err_flow_free;
1328 mask_p->key = mask.key;
1329 mask_p->range = mask.range;
1330 ovs_sw_flow_mask_insert(table, mask_p);
1331 }
1332
1333 ovs_sw_flow_mask_add_ref(mask_p);
1334 flow->mask = mask_p;
1030 rcu_assign_pointer(flow->sf_acts, acts); 1335 rcu_assign_pointer(flow->sf_acts, acts);
1031 1336
1032 /* Put flow in bucket. */ 1337 /* Put flow in bucket. */
1033 flow->hash = ovs_flow_hash(&key, key_len); 1338 ovs_flow_insert(table, flow);
1034 ovs_flow_tbl_insert(table, flow);
1035 1339
1036 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1340 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1037 info->snd_seq, 1341 info->snd_seq, OVS_FLOW_CMD_NEW);
1038 OVS_FLOW_CMD_NEW);
1039 } else { 1342 } else {
1040 /* We found a matching flow. */ 1343 /* We found a matching flow. */
1041 struct sw_flow_actions *old_acts; 1344 struct sw_flow_actions *old_acts;
1042 struct nlattr *acts_attrs;
1043 1345
1044 /* Bail out if we're not allowed to modify an existing flow. 1346 /* Bail out if we're not allowed to modify an existing flow.
1045 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL 1347 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
@@ -1052,23 +1354,17 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1052 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) 1354 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1053 goto err_unlock_ovs; 1355 goto err_unlock_ovs;
1054 1356
1357 /* The unmasked key has to be the same for flow updates. */
1358 error = -EINVAL;
1359 if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) {
1360 OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
1361 goto err_unlock_ovs;
1362 }
1363
1055 /* Update actions. */ 1364 /* Update actions. */
1056 old_acts = ovsl_dereference(flow->sf_acts); 1365 old_acts = ovsl_dereference(flow->sf_acts);
1057 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; 1366 rcu_assign_pointer(flow->sf_acts, acts);
1058 if (acts_attrs && 1367 ovs_flow_deferred_free_acts(old_acts);
1059 (old_acts->actions_len != nla_len(acts_attrs) ||
1060 memcmp(old_acts->actions, nla_data(acts_attrs),
1061 old_acts->actions_len))) {
1062 struct sw_flow_actions *new_acts;
1063
1064 new_acts = ovs_flow_actions_alloc(acts_attrs);
1065 error = PTR_ERR(new_acts);
1066 if (IS_ERR(new_acts))
1067 goto err_unlock_ovs;
1068
1069 rcu_assign_pointer(flow->sf_acts, new_acts);
1070 ovs_flow_deferred_free_acts(old_acts);
1071 }
1072 1368
1073 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1369 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1074 info->snd_seq, OVS_FLOW_CMD_NEW); 1370 info->snd_seq, OVS_FLOW_CMD_NEW);
@@ -1089,10 +1385,12 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1089 ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); 1385 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1090 return 0; 1386 return 0;
1091 1387
1092error_free_flow: 1388err_flow_free:
1093 ovs_flow_free(flow); 1389 ovs_flow_free(flow, false);
1094err_unlock_ovs: 1390err_unlock_ovs:
1095 ovs_unlock(); 1391 ovs_unlock();
1392err_kfree:
1393 kfree(acts);
1096error: 1394error:
1097 return error; 1395 return error;
1098} 1396}
@@ -1106,12 +1404,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1106 struct sw_flow *flow; 1404 struct sw_flow *flow;
1107 struct datapath *dp; 1405 struct datapath *dp;
1108 struct flow_table *table; 1406 struct flow_table *table;
1407 struct sw_flow_match match;
1109 int err; 1408 int err;
1110 int key_len;
1111 1409
1112 if (!a[OVS_FLOW_ATTR_KEY]) 1410 if (!a[OVS_FLOW_ATTR_KEY]) {
1411 OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
1113 return -EINVAL; 1412 return -EINVAL;
1114 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1413 }
1414
1415 ovs_match_init(&match, &key, NULL);
1416 err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
1115 if (err) 1417 if (err)
1116 return err; 1418 return err;
1117 1419
@@ -1123,7 +1425,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1123 } 1425 }
1124 1426
1125 table = ovsl_dereference(dp->table); 1427 table = ovsl_dereference(dp->table);
1126 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1428 flow = ovs_flow_lookup_unmasked_key(table, &match);
1127 if (!flow) { 1429 if (!flow) {
1128 err = -ENOENT; 1430 err = -ENOENT;
1129 goto unlock; 1431 goto unlock;
@@ -1152,8 +1454,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1152 struct sw_flow *flow; 1454 struct sw_flow *flow;
1153 struct datapath *dp; 1455 struct datapath *dp;
1154 struct flow_table *table; 1456 struct flow_table *table;
1457 struct sw_flow_match match;
1155 int err; 1458 int err;
1156 int key_len;
1157 1459
1158 ovs_lock(); 1460 ovs_lock();
1159 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1461 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1166,12 +1468,14 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1166 err = flush_flows(dp); 1468 err = flush_flows(dp);
1167 goto unlock; 1469 goto unlock;
1168 } 1470 }
1169 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1471
1472 ovs_match_init(&match, &key, NULL);
1473 err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
1170 if (err) 1474 if (err)
1171 goto unlock; 1475 goto unlock;
1172 1476
1173 table = ovsl_dereference(dp->table); 1477 table = ovsl_dereference(dp->table);
1174 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1478 flow = ovs_flow_lookup_unmasked_key(table, &match);
1175 if (!flow) { 1479 if (!flow) {
1176 err = -ENOENT; 1480 err = -ENOENT;
1177 goto unlock; 1481 goto unlock;
@@ -1183,13 +1487,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1183 goto unlock; 1487 goto unlock;
1184 } 1488 }
1185 1489
1186 ovs_flow_tbl_remove(table, flow); 1490 ovs_flow_remove(table, flow);
1187 1491
1188 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, 1492 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
1189 info->snd_seq, 0, OVS_FLOW_CMD_DEL); 1493 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1190 BUG_ON(err < 0); 1494 BUG_ON(err < 0);
1191 1495
1192 ovs_flow_deferred_free(flow); 1496 ovs_flow_free(flow, true);
1193 ovs_unlock(); 1497 ovs_unlock();
1194 1498
1195 ovs_notify(reply, info, &ovs_dp_flow_multicast_group); 1499 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
@@ -1205,22 +1509,21 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1205 struct datapath *dp; 1509 struct datapath *dp;
1206 struct flow_table *table; 1510 struct flow_table *table;
1207 1511
1208 ovs_lock(); 1512 rcu_read_lock();
1209 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1513 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1210 if (!dp) { 1514 if (!dp) {
1211 ovs_unlock(); 1515 rcu_read_unlock();
1212 return -ENODEV; 1516 return -ENODEV;
1213 } 1517 }
1214 1518
1215 table = ovsl_dereference(dp->table); 1519 table = rcu_dereference(dp->table);
1216
1217 for (;;) { 1520 for (;;) {
1218 struct sw_flow *flow; 1521 struct sw_flow *flow;
1219 u32 bucket, obj; 1522 u32 bucket, obj;
1220 1523
1221 bucket = cb->args[0]; 1524 bucket = cb->args[0];
1222 obj = cb->args[1]; 1525 obj = cb->args[1];
1223 flow = ovs_flow_tbl_next(table, &bucket, &obj); 1526 flow = ovs_flow_dump_next(table, &bucket, &obj);
1224 if (!flow) 1527 if (!flow)
1225 break; 1528 break;
1226 1529
@@ -1233,7 +1536,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1233 cb->args[0] = bucket; 1536 cb->args[0] = bucket;
1234 cb->args[1] = obj; 1537 cb->args[1] = obj;
1235 } 1538 }
1236 ovs_unlock(); 1539 rcu_read_unlock();
1237 return skb->len; 1540 return skb->len;
1238} 1541}
1239 1542
@@ -1429,7 +1732,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1429 goto err_destroy_local_port; 1732 goto err_destroy_local_port;
1430 1733
1431 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1734 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1432 list_add_tail(&dp->list_node, &ovs_net->dps); 1735 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1433 1736
1434 ovs_unlock(); 1737 ovs_unlock();
1435 1738
@@ -1443,7 +1746,7 @@ err_destroy_ports_array:
1443err_destroy_percpu: 1746err_destroy_percpu:
1444 free_percpu(dp->stats_percpu); 1747 free_percpu(dp->stats_percpu);
1445err_destroy_table: 1748err_destroy_table:
1446 ovs_flow_tbl_destroy(ovsl_dereference(dp->table)); 1749 ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false);
1447err_free_dp: 1750err_free_dp:
1448 release_net(ovs_dp_get_net(dp)); 1751 release_net(ovs_dp_get_net(dp));
1449 kfree(dp); 1752 kfree(dp);
@@ -1467,7 +1770,7 @@ static void __dp_destroy(struct datapath *dp)
1467 ovs_dp_detach_port(vport); 1770 ovs_dp_detach_port(vport);
1468 } 1771 }
1469 1772
1470 list_del(&dp->list_node); 1773 list_del_rcu(&dp->list_node);
1471 1774
1472 /* OVSP_LOCAL is datapath internal port. We need to make sure that 1775 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1473 * all port in datapath are destroyed first before freeing datapath. 1776 * all port in datapath are destroyed first before freeing datapath.
@@ -1572,8 +1875,8 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1572 int skip = cb->args[0]; 1875 int skip = cb->args[0];
1573 int i = 0; 1876 int i = 0;
1574 1877
1575 ovs_lock(); 1878 rcu_read_lock();
1576 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1879 list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) {
1577 if (i >= skip && 1880 if (i >= skip &&
1578 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1881 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1579 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1882 cb->nlh->nlmsg_seq, NLM_F_MULTI,
@@ -1581,7 +1884,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1581 break; 1884 break;
1582 i++; 1885 i++;
1583 } 1886 }
1584 ovs_unlock(); 1887 rcu_read_unlock();
1585 1888
1586 cb->args[0] = i; 1889 cb->args[0] = i;
1587 1890
@@ -1812,10 +2115,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1812 if (IS_ERR(vport)) 2115 if (IS_ERR(vport))
1813 goto exit_unlock; 2116 goto exit_unlock;
1814 2117
1815 err = 0;
1816 if (a[OVS_VPORT_ATTR_TYPE] && 2118 if (a[OVS_VPORT_ATTR_TYPE] &&
1817 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) 2119 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
1818 err = -EINVAL; 2120 err = -EINVAL;
2121 goto exit_unlock;
2122 }
1819 2123
1820 reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2124 reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1821 if (!reply) { 2125 if (!reply) {
@@ -1823,10 +2127,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1823 goto exit_unlock; 2127 goto exit_unlock;
1824 } 2128 }
1825 2129
1826 if (!err && a[OVS_VPORT_ATTR_OPTIONS]) 2130 if (a[OVS_VPORT_ATTR_OPTIONS]) {
1827 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); 2131 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1828 if (err) 2132 if (err)
1829 goto exit_free; 2133 goto exit_free;
2134 }
1830 2135
1831 if (a[OVS_VPORT_ATTR_UPCALL_PID]) 2136 if (a[OVS_VPORT_ATTR_UPCALL_PID])
1832 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); 2137 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
@@ -1839,9 +2144,6 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1839 ovs_notify(reply, info, &ovs_dp_vport_multicast_group); 2144 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1840 return 0; 2145 return 0;
1841 2146
1842 rtnl_unlock();
1843 return 0;
1844
1845exit_free: 2147exit_free:
1846 kfree_skb(reply); 2148 kfree_skb(reply);
1847exit_unlock: 2149exit_unlock:
@@ -1867,8 +2169,8 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1867 goto exit_unlock; 2169 goto exit_unlock;
1868 } 2170 }
1869 2171
1870 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, 2172 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
1871 OVS_VPORT_CMD_DEL); 2173 info->snd_seq, OVS_VPORT_CMD_DEL);
1872 err = PTR_ERR(reply); 2174 err = PTR_ERR(reply);
1873 if (IS_ERR(reply)) 2175 if (IS_ERR(reply))
1874 goto exit_unlock; 2176 goto exit_unlock;
@@ -1897,8 +2199,8 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1897 if (IS_ERR(vport)) 2199 if (IS_ERR(vport))
1898 goto exit_unlock; 2200 goto exit_unlock;
1899 2201
1900 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, 2202 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
1901 OVS_VPORT_CMD_NEW); 2203 info->snd_seq, OVS_VPORT_CMD_NEW);
1902 err = PTR_ERR(reply); 2204 err = PTR_ERR(reply);
1903 if (IS_ERR(reply)) 2205 if (IS_ERR(reply))
1904 goto exit_unlock; 2206 goto exit_unlock;
@@ -2051,7 +2353,7 @@ static void rehash_flow_table(struct work_struct *work)
2051 new_table = ovs_flow_tbl_rehash(old_table); 2353 new_table = ovs_flow_tbl_rehash(old_table);
2052 if (!IS_ERR(new_table)) { 2354 if (!IS_ERR(new_table)) {
2053 rcu_assign_pointer(dp->table, new_table); 2355 rcu_assign_pointer(dp->table, new_table);
2054 ovs_flow_tbl_deferred_destroy(old_table); 2356 ovs_flow_tbl_destroy(old_table, true);
2055 } 2357 }
2056 } 2358 }
2057 } 2359 }
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 16b840695216..4d109c176ef3 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -88,9 +88,14 @@ struct datapath {
88/** 88/**
89 * struct ovs_skb_cb - OVS data in skb CB 89 * struct ovs_skb_cb - OVS data in skb CB
90 * @flow: The flow associated with this packet. May be %NULL if no flow. 90 * @flow: The flow associated with this packet. May be %NULL if no flow.
91 * @pkt_key: The flow information extracted from the packet. Must be nonnull.
92 * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
93 * packet is not being tunneled.
91 */ 94 */
92struct ovs_skb_cb { 95struct ovs_skb_cb {
93 struct sw_flow *flow; 96 struct sw_flow *flow;
97 struct sw_flow_key *pkt_key;
98 struct ovs_key_ipv4_tunnel *tun_key;
94}; 99};
95#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) 100#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
96 101
@@ -119,6 +124,7 @@ struct dp_upcall_info {
119struct ovs_net { 124struct ovs_net {
120 struct list_head dps; 125 struct list_head dps;
121 struct work_struct dp_notify_work; 126 struct work_struct dp_notify_work;
127 struct vport_net vport_net;
122}; 128};
123 129
124extern int ovs_net_id; 130extern int ovs_net_id;
@@ -179,4 +185,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
179 185
180int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); 186int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
181void ovs_dp_notify_wq(struct work_struct *work); 187void ovs_dp_notify_wq(struct work_struct *work);
188
189#define OVS_NLERR(fmt, ...) \
190 pr_info_once("netlink: " fmt, ##__VA_ARGS__)
191
182#endif /* datapath.h */ 192#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index ef4feec6cd84..c3235675f359 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -78,7 +78,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event,
78 void *ptr) 78 void *ptr)
79{ 79{
80 struct ovs_net *ovs_net; 80 struct ovs_net *ovs_net;
81 struct net_device *dev = ptr; 81 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
82 struct vport *vport = NULL; 82 struct vport *vport = NULL;
83 83
84 if (!ovs_is_internal_dev(dev)) 84 if (!ovs_is_internal_dev(dev))
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index b15321a2228c..410db90db73d 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2011 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -34,17 +34,215 @@
34#include <linux/if_arp.h> 34#include <linux/if_arp.h>
35#include <linux/ip.h> 35#include <linux/ip.h>
36#include <linux/ipv6.h> 36#include <linux/ipv6.h>
37#include <linux/sctp.h>
37#include <linux/tcp.h> 38#include <linux/tcp.h>
38#include <linux/udp.h> 39#include <linux/udp.h>
39#include <linux/icmp.h> 40#include <linux/icmp.h>
40#include <linux/icmpv6.h> 41#include <linux/icmpv6.h>
41#include <linux/rculist.h> 42#include <linux/rculist.h>
42#include <net/ip.h> 43#include <net/ip.h>
44#include <net/ip_tunnels.h>
43#include <net/ipv6.h> 45#include <net/ipv6.h>
44#include <net/ndisc.h> 46#include <net/ndisc.h>
45 47
46static struct kmem_cache *flow_cache; 48static struct kmem_cache *flow_cache;
47 49
50static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
51 struct sw_flow_key_range *range, u8 val);
52
53static void update_range__(struct sw_flow_match *match,
54 size_t offset, size_t size, bool is_mask)
55{
56 struct sw_flow_key_range *range = NULL;
57 size_t start = rounddown(offset, sizeof(long));
58 size_t end = roundup(offset + size, sizeof(long));
59
60 if (!is_mask)
61 range = &match->range;
62 else if (match->mask)
63 range = &match->mask->range;
64
65 if (!range)
66 return;
67
68 if (range->start == range->end) {
69 range->start = start;
70 range->end = end;
71 return;
72 }
73
74 if (range->start > start)
75 range->start = start;
76
77 if (range->end < end)
78 range->end = end;
79}
80
81#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
82 do { \
83 update_range__(match, offsetof(struct sw_flow_key, field), \
84 sizeof((match)->key->field), is_mask); \
85 if (is_mask) { \
86 if ((match)->mask) \
87 (match)->mask->key.field = value; \
88 } else { \
89 (match)->key->field = value; \
90 } \
91 } while (0)
92
93#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
94 do { \
95 update_range__(match, offsetof(struct sw_flow_key, field), \
96 len, is_mask); \
97 if (is_mask) { \
98 if ((match)->mask) \
99 memcpy(&(match)->mask->key.field, value_p, len);\
100 } else { \
101 memcpy(&(match)->key->field, value_p, len); \
102 } \
103 } while (0)
104
105static u16 range_n_bytes(const struct sw_flow_key_range *range)
106{
107 return range->end - range->start;
108}
109
110void ovs_match_init(struct sw_flow_match *match,
111 struct sw_flow_key *key,
112 struct sw_flow_mask *mask)
113{
114 memset(match, 0, sizeof(*match));
115 match->key = key;
116 match->mask = mask;
117
118 memset(key, 0, sizeof(*key));
119
120 if (mask) {
121 memset(&mask->key, 0, sizeof(mask->key));
122 mask->range.start = mask->range.end = 0;
123 }
124}
125
126static bool ovs_match_validate(const struct sw_flow_match *match,
127 u64 key_attrs, u64 mask_attrs)
128{
129 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
130 u64 mask_allowed = key_attrs; /* At most allow all key attributes */
131
132 /* The following mask attributes allowed only if they
133 * pass the validation tests. */
134 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
135 | (1 << OVS_KEY_ATTR_IPV6)
136 | (1 << OVS_KEY_ATTR_TCP)
137 | (1 << OVS_KEY_ATTR_UDP)
138 | (1 << OVS_KEY_ATTR_SCTP)
139 | (1 << OVS_KEY_ATTR_ICMP)
140 | (1 << OVS_KEY_ATTR_ICMPV6)
141 | (1 << OVS_KEY_ATTR_ARP)
142 | (1 << OVS_KEY_ATTR_ND));
143
144 /* Always allowed mask fields. */
145 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
146 | (1 << OVS_KEY_ATTR_IN_PORT)
147 | (1 << OVS_KEY_ATTR_ETHERTYPE));
148
149 /* Check key attributes. */
150 if (match->key->eth.type == htons(ETH_P_ARP)
151 || match->key->eth.type == htons(ETH_P_RARP)) {
152 key_expected |= 1 << OVS_KEY_ATTR_ARP;
153 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
154 mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
155 }
156
157 if (match->key->eth.type == htons(ETH_P_IP)) {
158 key_expected |= 1 << OVS_KEY_ATTR_IPV4;
159 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
160 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
161
162 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
163 if (match->key->ip.proto == IPPROTO_UDP) {
164 key_expected |= 1 << OVS_KEY_ATTR_UDP;
165 if (match->mask && (match->mask->key.ip.proto == 0xff))
166 mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
167 }
168
169 if (match->key->ip.proto == IPPROTO_SCTP) {
170 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
171 if (match->mask && (match->mask->key.ip.proto == 0xff))
172 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
173 }
174
175 if (match->key->ip.proto == IPPROTO_TCP) {
176 key_expected |= 1 << OVS_KEY_ATTR_TCP;
177 if (match->mask && (match->mask->key.ip.proto == 0xff))
178 mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
179 }
180
181 if (match->key->ip.proto == IPPROTO_ICMP) {
182 key_expected |= 1 << OVS_KEY_ATTR_ICMP;
183 if (match->mask && (match->mask->key.ip.proto == 0xff))
184 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
185 }
186 }
187 }
188
189 if (match->key->eth.type == htons(ETH_P_IPV6)) {
190 key_expected |= 1 << OVS_KEY_ATTR_IPV6;
191 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
192 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
193
194 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
195 if (match->key->ip.proto == IPPROTO_UDP) {
196 key_expected |= 1 << OVS_KEY_ATTR_UDP;
197 if (match->mask && (match->mask->key.ip.proto == 0xff))
198 mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
199 }
200
201 if (match->key->ip.proto == IPPROTO_SCTP) {
202 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
203 if (match->mask && (match->mask->key.ip.proto == 0xff))
204 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
205 }
206
207 if (match->key->ip.proto == IPPROTO_TCP) {
208 key_expected |= 1 << OVS_KEY_ATTR_TCP;
209 if (match->mask && (match->mask->key.ip.proto == 0xff))
210 mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
211 }
212
213 if (match->key->ip.proto == IPPROTO_ICMPV6) {
214 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
215 if (match->mask && (match->mask->key.ip.proto == 0xff))
216 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
217
218 if (match->key->ipv6.tp.src ==
219 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
220 match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
221 key_expected |= 1 << OVS_KEY_ATTR_ND;
222 if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
223 mask_allowed |= 1 << OVS_KEY_ATTR_ND;
224 }
225 }
226 }
227 }
228
229 if ((key_attrs & key_expected) != key_expected) {
230 /* Key attributes check failed. */
231 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
232 key_attrs, key_expected);
233 return false;
234 }
235
236 if ((mask_attrs & mask_allowed) != mask_attrs) {
237 /* Mask attributes check failed. */
238 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
239 mask_attrs, mask_allowed);
240 return false;
241 }
242
243 return true;
244}
245
48static int check_header(struct sk_buff *skb, int len) 246static int check_header(struct sk_buff *skb, int len)
49{ 247{
50 if (unlikely(skb->len < len)) 248 if (unlikely(skb->len < len))
@@ -101,6 +299,12 @@ static bool udphdr_ok(struct sk_buff *skb)
101 sizeof(struct udphdr)); 299 sizeof(struct udphdr));
102} 300}
103 301
302static bool sctphdr_ok(struct sk_buff *skb)
303{
304 return pskb_may_pull(skb, skb_transport_offset(skb) +
305 sizeof(struct sctphdr));
306}
307
104static bool icmphdr_ok(struct sk_buff *skb) 308static bool icmphdr_ok(struct sk_buff *skb)
105{ 309{
106 return pskb_may_pull(skb, skb_transport_offset(skb) + 310 return pskb_may_pull(skb, skb_transport_offset(skb) +
@@ -120,12 +324,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
120 return cur_ms - idle_ms; 324 return cur_ms - idle_ms;
121} 325}
122 326
123#define SW_FLOW_KEY_OFFSET(field) \ 327static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
124 (offsetof(struct sw_flow_key, field) + \
125 FIELD_SIZEOF(struct sw_flow_key, field))
126
127static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
128 int *key_lenp)
129{ 328{
130 unsigned int nh_ofs = skb_network_offset(skb); 329 unsigned int nh_ofs = skb_network_offset(skb);
131 unsigned int nh_len; 330 unsigned int nh_len;
@@ -135,8 +334,6 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
135 __be16 frag_off; 334 __be16 frag_off;
136 int err; 335 int err;
137 336
138 *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
139
140 err = check_header(skb, nh_ofs + sizeof(*nh)); 337 err = check_header(skb, nh_ofs + sizeof(*nh));
141 if (unlikely(err)) 338 if (unlikely(err))
142 return err; 339 return err;
@@ -175,6 +372,22 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
175 sizeof(struct icmp6hdr)); 372 sizeof(struct icmp6hdr));
176} 373}
177 374
375void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
376 const struct sw_flow_mask *mask)
377{
378 const long *m = (long *)((u8 *)&mask->key + mask->range.start);
379 const long *s = (long *)((u8 *)src + mask->range.start);
380 long *d = (long *)((u8 *)dst + mask->range.start);
381 int i;
382
383 /* The memory outside of the 'mask->range' are not set since
384 * further operations on 'dst' only uses contents within
385 * 'mask->range'.
386 */
387 for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
388 *d++ = *s++ & *m++;
389}
390
178#define TCP_FLAGS_OFFSET 13 391#define TCP_FLAGS_OFFSET 13
179#define TCP_FLAG_MASK 0x3f 392#define TCP_FLAG_MASK 0x3f
180 393
@@ -198,20 +411,18 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
198 spin_unlock(&flow->lock); 411 spin_unlock(&flow->lock);
199} 412}
200 413
201struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) 414struct sw_flow_actions *ovs_flow_actions_alloc(int size)
202{ 415{
203 int actions_len = nla_len(actions);
204 struct sw_flow_actions *sfa; 416 struct sw_flow_actions *sfa;
205 417
206 if (actions_len > MAX_ACTIONS_BUFSIZE) 418 if (size > MAX_ACTIONS_BUFSIZE)
207 return ERR_PTR(-EINVAL); 419 return ERR_PTR(-EINVAL);
208 420
209 sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); 421 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
210 if (!sfa) 422 if (!sfa)
211 return ERR_PTR(-ENOMEM); 423 return ERR_PTR(-ENOMEM);
212 424
213 sfa->actions_len = actions_len; 425 sfa->actions_len = 0;
214 nla_memcpy(sfa->actions, actions, actions_len);
215 return sfa; 426 return sfa;
216} 427}
217 428
@@ -225,6 +436,7 @@ struct sw_flow *ovs_flow_alloc(void)
225 436
226 spin_lock_init(&flow->lock); 437 spin_lock_init(&flow->lock);
227 flow->sf_acts = NULL; 438 flow->sf_acts = NULL;
439 flow->mask = NULL;
228 440
229 return flow; 441 return flow;
230} 442}
@@ -241,7 +453,7 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)
241 struct flex_array *buckets; 453 struct flex_array *buckets;
242 int i, err; 454 int i, err;
243 455
244 buckets = flex_array_alloc(sizeof(struct hlist_head *), 456 buckets = flex_array_alloc(sizeof(struct hlist_head),
245 n_buckets, GFP_KERNEL); 457 n_buckets, GFP_KERNEL);
246 if (!buckets) 458 if (!buckets)
247 return NULL; 459 return NULL;
@@ -264,7 +476,7 @@ static void free_buckets(struct flex_array *buckets)
264 flex_array_free(buckets); 476 flex_array_free(buckets);
265} 477}
266 478
267struct flow_table *ovs_flow_tbl_alloc(int new_size) 479static struct flow_table *__flow_tbl_alloc(int new_size)
268{ 480{
269 struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); 481 struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
270 482
@@ -282,17 +494,15 @@ struct flow_table *ovs_flow_tbl_alloc(int new_size)
282 table->node_ver = 0; 494 table->node_ver = 0;
283 table->keep_flows = false; 495 table->keep_flows = false;
284 get_random_bytes(&table->hash_seed, sizeof(u32)); 496 get_random_bytes(&table->hash_seed, sizeof(u32));
497 table->mask_list = NULL;
285 498
286 return table; 499 return table;
287} 500}
288 501
289void ovs_flow_tbl_destroy(struct flow_table *table) 502static void __flow_tbl_destroy(struct flow_table *table)
290{ 503{
291 int i; 504 int i;
292 505
293 if (!table)
294 return;
295
296 if (table->keep_flows) 506 if (table->keep_flows)
297 goto skip_flows; 507 goto skip_flows;
298 508
@@ -303,32 +513,56 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
303 int ver = table->node_ver; 513 int ver = table->node_ver;
304 514
305 hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { 515 hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
306 hlist_del_rcu(&flow->hash_node[ver]); 516 hlist_del(&flow->hash_node[ver]);
307 ovs_flow_free(flow); 517 ovs_flow_free(flow, false);
308 } 518 }
309 } 519 }
310 520
521 BUG_ON(!list_empty(table->mask_list));
522 kfree(table->mask_list);
523
311skip_flows: 524skip_flows:
312 free_buckets(table->buckets); 525 free_buckets(table->buckets);
313 kfree(table); 526 kfree(table);
314} 527}
315 528
529struct flow_table *ovs_flow_tbl_alloc(int new_size)
530{
531 struct flow_table *table = __flow_tbl_alloc(new_size);
532
533 if (!table)
534 return NULL;
535
536 table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
537 if (!table->mask_list) {
538 table->keep_flows = true;
539 __flow_tbl_destroy(table);
540 return NULL;
541 }
542 INIT_LIST_HEAD(table->mask_list);
543
544 return table;
545}
546
316static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) 547static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
317{ 548{
318 struct flow_table *table = container_of(rcu, struct flow_table, rcu); 549 struct flow_table *table = container_of(rcu, struct flow_table, rcu);
319 550
320 ovs_flow_tbl_destroy(table); 551 __flow_tbl_destroy(table);
321} 552}
322 553
323void ovs_flow_tbl_deferred_destroy(struct flow_table *table) 554void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
324{ 555{
325 if (!table) 556 if (!table)
326 return; 557 return;
327 558
328 call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); 559 if (deferred)
560 call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
561 else
562 __flow_tbl_destroy(table);
329} 563}
330 564
331struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last) 565struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last)
332{ 566{
333 struct sw_flow *flow; 567 struct sw_flow *flow;
334 struct hlist_head *head; 568 struct hlist_head *head;
@@ -354,6 +588,16 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
354 return NULL; 588 return NULL;
355} 589}
356 590
591static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
592{
593 struct hlist_head *head;
594
595 head = find_bucket(table, flow->hash);
596 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
597
598 table->count++;
599}
600
357static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) 601static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
358{ 602{
359 int old_ver; 603 int old_ver;
@@ -370,8 +614,10 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new
370 head = flex_array_get(old->buckets, i); 614 head = flex_array_get(old->buckets, i);
371 615
372 hlist_for_each_entry(flow, head, hash_node[old_ver]) 616 hlist_for_each_entry(flow, head, hash_node[old_ver])
373 ovs_flow_tbl_insert(new, flow); 617 __tbl_insert(new, flow);
374 } 618 }
619
620 new->mask_list = old->mask_list;
375 old->keep_flows = true; 621 old->keep_flows = true;
376} 622}
377 623
@@ -379,7 +625,7 @@ static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buck
379{ 625{
380 struct flow_table *new_table; 626 struct flow_table *new_table;
381 627
382 new_table = ovs_flow_tbl_alloc(n_buckets); 628 new_table = __flow_tbl_alloc(n_buckets);
383 if (!new_table) 629 if (!new_table)
384 return ERR_PTR(-ENOMEM); 630 return ERR_PTR(-ENOMEM);
385 631
@@ -398,28 +644,30 @@ struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
398 return __flow_tbl_rehash(table, table->n_buckets * 2); 644 return __flow_tbl_rehash(table, table->n_buckets * 2);
399} 645}
400 646
401void ovs_flow_free(struct sw_flow *flow) 647static void __flow_free(struct sw_flow *flow)
402{ 648{
403 if (unlikely(!flow))
404 return;
405
406 kfree((struct sf_flow_acts __force *)flow->sf_acts); 649 kfree((struct sf_flow_acts __force *)flow->sf_acts);
407 kmem_cache_free(flow_cache, flow); 650 kmem_cache_free(flow_cache, flow);
408} 651}
409 652
410/* RCU callback used by ovs_flow_deferred_free. */
411static void rcu_free_flow_callback(struct rcu_head *rcu) 653static void rcu_free_flow_callback(struct rcu_head *rcu)
412{ 654{
413 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); 655 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
414 656
415 ovs_flow_free(flow); 657 __flow_free(flow);
416} 658}
417 659
418/* Schedules 'flow' to be freed after the next RCU grace period. 660void ovs_flow_free(struct sw_flow *flow, bool deferred)
419 * The caller must hold rcu_read_lock for this to be sensible. */
420void ovs_flow_deferred_free(struct sw_flow *flow)
421{ 661{
422 call_rcu(&flow->rcu, rcu_free_flow_callback); 662 if (!flow)
663 return;
664
665 ovs_sw_flow_mask_del_ref(flow->mask, deferred);
666
667 if (deferred)
668 call_rcu(&flow->rcu, rcu_free_flow_callback);
669 else
670 __flow_free(flow);
423} 671}
424 672
425/* Schedules 'sf_acts' to be freed after the next RCU grace period. 673/* Schedules 'sf_acts' to be freed after the next RCU grace period.
@@ -490,18 +738,15 @@ static __be16 parse_ethertype(struct sk_buff *skb)
490} 738}
491 739
492static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, 740static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
493 int *key_lenp, int nh_len) 741 int nh_len)
494{ 742{
495 struct icmp6hdr *icmp = icmp6_hdr(skb); 743 struct icmp6hdr *icmp = icmp6_hdr(skb);
496 int error = 0;
497 int key_len;
498 744
499 /* The ICMPv6 type and code fields use the 16-bit transport port 745 /* The ICMPv6 type and code fields use the 16-bit transport port
500 * fields, so we need to store them in 16-bit network byte order. 746 * fields, so we need to store them in 16-bit network byte order.
501 */ 747 */
502 key->ipv6.tp.src = htons(icmp->icmp6_type); 748 key->ipv6.tp.src = htons(icmp->icmp6_type);
503 key->ipv6.tp.dst = htons(icmp->icmp6_code); 749 key->ipv6.tp.dst = htons(icmp->icmp6_code);
504 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
505 750
506 if (icmp->icmp6_code == 0 && 751 if (icmp->icmp6_code == 0 &&
507 (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || 752 (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -510,21 +755,17 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
510 struct nd_msg *nd; 755 struct nd_msg *nd;
511 int offset; 756 int offset;
512 757
513 key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
514
515 /* In order to process neighbor discovery options, we need the 758 /* In order to process neighbor discovery options, we need the
516 * entire packet. 759 * entire packet.
517 */ 760 */
518 if (unlikely(icmp_len < sizeof(*nd))) 761 if (unlikely(icmp_len < sizeof(*nd)))
519 goto out; 762 return 0;
520 if (unlikely(skb_linearize(skb))) { 763
521 error = -ENOMEM; 764 if (unlikely(skb_linearize(skb)))
522 goto out; 765 return -ENOMEM;
523 }
524 766
525 nd = (struct nd_msg *)skb_transport_header(skb); 767 nd = (struct nd_msg *)skb_transport_header(skb);
526 key->ipv6.nd.target = nd->target; 768 key->ipv6.nd.target = nd->target;
527 key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
528 769
529 icmp_len -= sizeof(*nd); 770 icmp_len -= sizeof(*nd);
530 offset = 0; 771 offset = 0;
@@ -534,7 +775,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
534 int opt_len = nd_opt->nd_opt_len * 8; 775 int opt_len = nd_opt->nd_opt_len * 8;
535 776
536 if (unlikely(!opt_len || opt_len > icmp_len)) 777 if (unlikely(!opt_len || opt_len > icmp_len))
537 goto invalid; 778 return 0;
538 779
539 /* Store the link layer address if the appropriate 780 /* Store the link layer address if the appropriate
540 * option is provided. It is considered an error if 781 * option is provided. It is considered an error if
@@ -559,16 +800,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
559 } 800 }
560 } 801 }
561 802
562 goto out; 803 return 0;
563 804
564invalid: 805invalid:
565 memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target)); 806 memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
566 memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll)); 807 memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
567 memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll)); 808 memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
568 809
569out: 810 return 0;
570 *key_lenp = key_len;
571 return error;
572} 811}
573 812
574/** 813/**
@@ -577,7 +816,6 @@ out:
577 * Ethernet header 816 * Ethernet header
578 * @in_port: port number on which @skb was received. 817 * @in_port: port number on which @skb was received.
579 * @key: output flow key 818 * @key: output flow key
580 * @key_lenp: length of output flow key
581 * 819 *
582 * The caller must ensure that skb->len >= ETH_HLEN. 820 * The caller must ensure that skb->len >= ETH_HLEN.
583 * 821 *
@@ -590,21 +828,21 @@ out:
590 * - skb->network_header: just past the Ethernet header, or just past the 828 * - skb->network_header: just past the Ethernet header, or just past the
591 * VLAN header, to the first byte of the Ethernet payload. 829 * VLAN header, to the first byte of the Ethernet payload.
592 * 830 *
593 * - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6 831 * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
594 * on output, then just past the IP header, if one is present and 832 * on output, then just past the IP header, if one is present and
595 * of a correct length, otherwise the same as skb->network_header. 833 * of a correct length, otherwise the same as skb->network_header.
596 * For other key->dl_type values it is left untouched. 834 * For other key->eth.type values it is left untouched.
597 */ 835 */
598int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, 836int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
599 int *key_lenp)
600{ 837{
601 int error = 0; 838 int error;
602 int key_len = SW_FLOW_KEY_OFFSET(eth);
603 struct ethhdr *eth; 839 struct ethhdr *eth;
604 840
605 memset(key, 0, sizeof(*key)); 841 memset(key, 0, sizeof(*key));
606 842
607 key->phy.priority = skb->priority; 843 key->phy.priority = skb->priority;
844 if (OVS_CB(skb)->tun_key)
845 memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key));
608 key->phy.in_port = in_port; 846 key->phy.in_port = in_port;
609 key->phy.skb_mark = skb->mark; 847 key->phy.skb_mark = skb->mark;
610 848
@@ -618,6 +856,9 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
618 memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); 856 memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
619 857
620 __skb_pull(skb, 2 * ETH_ALEN); 858 __skb_pull(skb, 2 * ETH_ALEN);
859 /* We are going to push all headers that we pull, so no need to
860 * update skb->csum here.
861 */
621 862
622 if (vlan_tx_tag_present(skb)) 863 if (vlan_tx_tag_present(skb))
623 key->eth.tci = htons(skb->vlan_tci); 864 key->eth.tci = htons(skb->vlan_tci);
@@ -637,15 +878,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
637 struct iphdr *nh; 878 struct iphdr *nh;
638 __be16 offset; 879 __be16 offset;
639 880
640 key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
641
642 error = check_iphdr(skb); 881 error = check_iphdr(skb);
643 if (unlikely(error)) { 882 if (unlikely(error)) {
644 if (error == -EINVAL) { 883 if (error == -EINVAL) {
645 skb->transport_header = skb->network_header; 884 skb->transport_header = skb->network_header;
646 error = 0; 885 error = 0;
647 } 886 }
648 goto out; 887 return error;
649 } 888 }
650 889
651 nh = ip_hdr(skb); 890 nh = ip_hdr(skb);
@@ -659,7 +898,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
659 offset = nh->frag_off & htons(IP_OFFSET); 898 offset = nh->frag_off & htons(IP_OFFSET);
660 if (offset) { 899 if (offset) {
661 key->ip.frag = OVS_FRAG_TYPE_LATER; 900 key->ip.frag = OVS_FRAG_TYPE_LATER;
662 goto out; 901 return 0;
663 } 902 }
664 if (nh->frag_off & htons(IP_MF) || 903 if (nh->frag_off & htons(IP_MF) ||
665 skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 904 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
@@ -667,21 +906,24 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
667 906
668 /* Transport layer. */ 907 /* Transport layer. */
669 if (key->ip.proto == IPPROTO_TCP) { 908 if (key->ip.proto == IPPROTO_TCP) {
670 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
671 if (tcphdr_ok(skb)) { 909 if (tcphdr_ok(skb)) {
672 struct tcphdr *tcp = tcp_hdr(skb); 910 struct tcphdr *tcp = tcp_hdr(skb);
673 key->ipv4.tp.src = tcp->source; 911 key->ipv4.tp.src = tcp->source;
674 key->ipv4.tp.dst = tcp->dest; 912 key->ipv4.tp.dst = tcp->dest;
675 } 913 }
676 } else if (key->ip.proto == IPPROTO_UDP) { 914 } else if (key->ip.proto == IPPROTO_UDP) {
677 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
678 if (udphdr_ok(skb)) { 915 if (udphdr_ok(skb)) {
679 struct udphdr *udp = udp_hdr(skb); 916 struct udphdr *udp = udp_hdr(skb);
680 key->ipv4.tp.src = udp->source; 917 key->ipv4.tp.src = udp->source;
681 key->ipv4.tp.dst = udp->dest; 918 key->ipv4.tp.dst = udp->dest;
682 } 919 }
920 } else if (key->ip.proto == IPPROTO_SCTP) {
921 if (sctphdr_ok(skb)) {
922 struct sctphdr *sctp = sctp_hdr(skb);
923 key->ipv4.tp.src = sctp->source;
924 key->ipv4.tp.dst = sctp->dest;
925 }
683 } else if (key->ip.proto == IPPROTO_ICMP) { 926 } else if (key->ip.proto == IPPROTO_ICMP) {
684 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
685 if (icmphdr_ok(skb)) { 927 if (icmphdr_ok(skb)) {
686 struct icmphdr *icmp = icmp_hdr(skb); 928 struct icmphdr *icmp = icmp_hdr(skb);
687 /* The ICMP type and code fields use the 16-bit 929 /* The ICMP type and code fields use the 16-bit
@@ -710,90 +952,175 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
710 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); 952 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
711 memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); 953 memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
712 memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); 954 memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
713 key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
714 } 955 }
715 } else if (key->eth.type == htons(ETH_P_IPV6)) { 956 } else if (key->eth.type == htons(ETH_P_IPV6)) {
716 int nh_len; /* IPv6 Header + Extensions */ 957 int nh_len; /* IPv6 Header + Extensions */
717 958
718 nh_len = parse_ipv6hdr(skb, key, &key_len); 959 nh_len = parse_ipv6hdr(skb, key);
719 if (unlikely(nh_len < 0)) { 960 if (unlikely(nh_len < 0)) {
720 if (nh_len == -EINVAL) 961 if (nh_len == -EINVAL) {
721 skb->transport_header = skb->network_header; 962 skb->transport_header = skb->network_header;
722 else 963 error = 0;
964 } else {
723 error = nh_len; 965 error = nh_len;
724 goto out; 966 }
967 return error;
725 } 968 }
726 969
727 if (key->ip.frag == OVS_FRAG_TYPE_LATER) 970 if (key->ip.frag == OVS_FRAG_TYPE_LATER)
728 goto out; 971 return 0;
729 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 972 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
730 key->ip.frag = OVS_FRAG_TYPE_FIRST; 973 key->ip.frag = OVS_FRAG_TYPE_FIRST;
731 974
732 /* Transport layer. */ 975 /* Transport layer. */
733 if (key->ip.proto == NEXTHDR_TCP) { 976 if (key->ip.proto == NEXTHDR_TCP) {
734 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
735 if (tcphdr_ok(skb)) { 977 if (tcphdr_ok(skb)) {
736 struct tcphdr *tcp = tcp_hdr(skb); 978 struct tcphdr *tcp = tcp_hdr(skb);
737 key->ipv6.tp.src = tcp->source; 979 key->ipv6.tp.src = tcp->source;
738 key->ipv6.tp.dst = tcp->dest; 980 key->ipv6.tp.dst = tcp->dest;
739 } 981 }
740 } else if (key->ip.proto == NEXTHDR_UDP) { 982 } else if (key->ip.proto == NEXTHDR_UDP) {
741 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
742 if (udphdr_ok(skb)) { 983 if (udphdr_ok(skb)) {
743 struct udphdr *udp = udp_hdr(skb); 984 struct udphdr *udp = udp_hdr(skb);
744 key->ipv6.tp.src = udp->source; 985 key->ipv6.tp.src = udp->source;
745 key->ipv6.tp.dst = udp->dest; 986 key->ipv6.tp.dst = udp->dest;
746 } 987 }
988 } else if (key->ip.proto == NEXTHDR_SCTP) {
989 if (sctphdr_ok(skb)) {
990 struct sctphdr *sctp = sctp_hdr(skb);
991 key->ipv6.tp.src = sctp->source;
992 key->ipv6.tp.dst = sctp->dest;
993 }
747 } else if (key->ip.proto == NEXTHDR_ICMP) { 994 } else if (key->ip.proto == NEXTHDR_ICMP) {
748 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
749 if (icmp6hdr_ok(skb)) { 995 if (icmp6hdr_ok(skb)) {
750 error = parse_icmpv6(skb, key, &key_len, nh_len); 996 error = parse_icmpv6(skb, key, nh_len);
751 if (error < 0) 997 if (error)
752 goto out; 998 return error;
753 } 999 }
754 } 1000 }
755 } 1001 }
756 1002
757out: 1003 return 0;
758 *key_lenp = key_len; 1004}
759 return error; 1005
1006static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start,
1007 int key_end)
1008{
1009 u32 *hash_key = (u32 *)((u8 *)key + key_start);
1010 int hash_u32s = (key_end - key_start) >> 2;
1011
1012 /* Make sure number of hash bytes are multiple of u32. */
1013 BUILD_BUG_ON(sizeof(long) % sizeof(u32));
1014
1015 return jhash2(hash_key, hash_u32s, 0);
1016}
1017
1018static int flow_key_start(const struct sw_flow_key *key)
1019{
1020 if (key->tun_key.ipv4_dst)
1021 return 0;
1022 else
1023 return rounddown(offsetof(struct sw_flow_key, phy),
1024 sizeof(long));
1025}
1026
1027static bool __cmp_key(const struct sw_flow_key *key1,
1028 const struct sw_flow_key *key2, int key_start, int key_end)
1029{
1030 const long *cp1 = (long *)((u8 *)key1 + key_start);
1031 const long *cp2 = (long *)((u8 *)key2 + key_start);
1032 long diffs = 0;
1033 int i;
1034
1035 for (i = key_start; i < key_end; i += sizeof(long))
1036 diffs |= *cp1++ ^ *cp2++;
1037
1038 return diffs == 0;
1039}
1040
1041static bool __flow_cmp_masked_key(const struct sw_flow *flow,
1042 const struct sw_flow_key *key, int key_start, int key_end)
1043{
1044 return __cmp_key(&flow->key, key, key_start, key_end);
1045}
1046
1047static bool __flow_cmp_unmasked_key(const struct sw_flow *flow,
1048 const struct sw_flow_key *key, int key_start, int key_end)
1049{
1050 return __cmp_key(&flow->unmasked_key, key, key_start, key_end);
1051}
1052
1053bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
1054 const struct sw_flow_key *key, int key_end)
1055{
1056 int key_start;
1057 key_start = flow_key_start(key);
1058
1059 return __flow_cmp_unmasked_key(flow, key, key_start, key_end);
1060
760} 1061}
761 1062
762u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len) 1063struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
1064 struct sw_flow_match *match)
763{ 1065{
764 return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), 0); 1066 struct sw_flow_key *unmasked = match->key;
1067 int key_end = match->range.end;
1068 struct sw_flow *flow;
1069
1070 flow = ovs_flow_lookup(table, unmasked);
1071 if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end)))
1072 flow = NULL;
1073
1074 return flow;
765} 1075}
766 1076
767struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, 1077static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table,
768 struct sw_flow_key *key, int key_len) 1078 const struct sw_flow_key *unmasked,
1079 struct sw_flow_mask *mask)
769{ 1080{
770 struct sw_flow *flow; 1081 struct sw_flow *flow;
771 struct hlist_head *head; 1082 struct hlist_head *head;
1083 int key_start = mask->range.start;
1084 int key_end = mask->range.end;
772 u32 hash; 1085 u32 hash;
1086 struct sw_flow_key masked_key;
773 1087
774 hash = ovs_flow_hash(key, key_len); 1088 ovs_flow_key_mask(&masked_key, unmasked, mask);
775 1089 hash = ovs_flow_hash(&masked_key, key_start, key_end);
776 head = find_bucket(table, hash); 1090 head = find_bucket(table, hash);
777 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { 1091 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
778 1092 if (flow->mask == mask &&
779 if (flow->hash == hash && 1093 __flow_cmp_masked_key(flow, &masked_key,
780 !memcmp(&flow->key, key, key_len)) { 1094 key_start, key_end))
781 return flow; 1095 return flow;
782 }
783 } 1096 }
784 return NULL; 1097 return NULL;
785} 1098}
786 1099
787void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) 1100struct sw_flow *ovs_flow_lookup(struct flow_table *tbl,
1101 const struct sw_flow_key *key)
788{ 1102{
789 struct hlist_head *head; 1103 struct sw_flow *flow = NULL;
1104 struct sw_flow_mask *mask;
790 1105
791 head = find_bucket(table, flow->hash); 1106 list_for_each_entry_rcu(mask, tbl->mask_list, list) {
792 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); 1107 flow = ovs_masked_flow_lookup(tbl, key, mask);
793 table->count++; 1108 if (flow) /* Found */
1109 break;
1110 }
1111
1112 return flow;
1113}
1114
1115
1116void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow)
1117{
1118 flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start,
1119 flow->mask->range.end);
1120 __tbl_insert(table, flow);
794} 1121}
795 1122
796void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) 1123void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow)
797{ 1124{
798 BUG_ON(table->count == 0); 1125 BUG_ON(table->count == 0);
799 hlist_del_rcu(&flow->hash_node[table->node_ver]); 1126 hlist_del_rcu(&flow->hash_node[table->node_ver]);
@@ -813,319 +1140,602 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
813 [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 1140 [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
814 [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 1141 [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
815 [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 1142 [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
1143 [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
816 [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 1144 [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
817 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 1145 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
818 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 1146 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
819 [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 1147 [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
1148 [OVS_KEY_ATTR_TUNNEL] = -1,
820}; 1149};
821 1150
822static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, 1151static bool is_all_zero(const u8 *fp, size_t size)
823 const struct nlattr *a[], u32 *attrs)
824{ 1152{
825 const struct ovs_key_icmp *icmp_key; 1153 int i;
826 const struct ovs_key_tcp *tcp_key;
827 const struct ovs_key_udp *udp_key;
828 1154
829 switch (swkey->ip.proto) { 1155 if (!fp)
830 case IPPROTO_TCP: 1156 return false;
831 if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
832 return -EINVAL;
833 *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
834 1157
835 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); 1158 for (i = 0; i < size; i++)
836 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 1159 if (fp[i])
837 swkey->ipv4.tp.src = tcp_key->tcp_src; 1160 return false;
838 swkey->ipv4.tp.dst = tcp_key->tcp_dst; 1161
839 break; 1162 return true;
1163}
1164
1165static int __parse_flow_nlattrs(const struct nlattr *attr,
1166 const struct nlattr *a[],
1167 u64 *attrsp, bool nz)
1168{
1169 const struct nlattr *nla;
1170 u32 attrs;
1171 int rem;
1172
1173 attrs = *attrsp;
1174 nla_for_each_nested(nla, attr, rem) {
1175 u16 type = nla_type(nla);
1176 int expected_len;
840 1177
841 case IPPROTO_UDP: 1178 if (type > OVS_KEY_ATTR_MAX) {
842 if (!(*attrs & (1 << OVS_KEY_ATTR_UDP))) 1179 OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
1180 type, OVS_KEY_ATTR_MAX);
843 return -EINVAL; 1181 return -EINVAL;
844 *attrs &= ~(1 << OVS_KEY_ATTR_UDP); 1182 }
845 1183
846 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); 1184 if (attrs & (1 << type)) {
847 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 1185 OVS_NLERR("Duplicate key attribute (type %d).\n", type);
848 swkey->ipv4.tp.src = udp_key->udp_src; 1186 return -EINVAL;
849 swkey->ipv4.tp.dst = udp_key->udp_dst; 1187 }
850 break;
851 1188
852 case IPPROTO_ICMP: 1189 expected_len = ovs_key_lens[type];
853 if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP))) 1190 if (nla_len(nla) != expected_len && expected_len != -1) {
1191 OVS_NLERR("Key attribute has unexpected length (type=%d"
1192 ", length=%d, expected=%d).\n", type,
1193 nla_len(nla), expected_len);
854 return -EINVAL; 1194 return -EINVAL;
855 *attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 1195 }
856 1196
857 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); 1197 if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
858 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 1198 attrs |= 1 << type;
859 swkey->ipv4.tp.src = htons(icmp_key->icmp_type); 1199 a[type] = nla;
860 swkey->ipv4.tp.dst = htons(icmp_key->icmp_code); 1200 }
861 break; 1201 }
1202 if (rem) {
1203 OVS_NLERR("Message has %d unknown bytes.\n", rem);
1204 return -EINVAL;
862 } 1205 }
863 1206
1207 *attrsp = attrs;
864 return 0; 1208 return 0;
865} 1209}
866 1210
867static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, 1211static int parse_flow_mask_nlattrs(const struct nlattr *attr,
868 const struct nlattr *a[], u32 *attrs) 1212 const struct nlattr *a[], u64 *attrsp)
869{ 1213{
870 const struct ovs_key_icmpv6 *icmpv6_key; 1214 return __parse_flow_nlattrs(attr, a, attrsp, true);
871 const struct ovs_key_tcp *tcp_key; 1215}
872 const struct ovs_key_udp *udp_key;
873
874 switch (swkey->ip.proto) {
875 case IPPROTO_TCP:
876 if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
877 return -EINVAL;
878 *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
879 1216
880 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); 1217static int parse_flow_nlattrs(const struct nlattr *attr,
881 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 1218 const struct nlattr *a[], u64 *attrsp)
882 swkey->ipv6.tp.src = tcp_key->tcp_src; 1219{
883 swkey->ipv6.tp.dst = tcp_key->tcp_dst; 1220 return __parse_flow_nlattrs(attr, a, attrsp, false);
884 break; 1221}
885 1222
886 case IPPROTO_UDP: 1223int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
887 if (!(*attrs & (1 << OVS_KEY_ATTR_UDP))) 1224 struct sw_flow_match *match, bool is_mask)
1225{
1226 struct nlattr *a;
1227 int rem;
1228 bool ttl = false;
1229 __be16 tun_flags = 0;
1230
1231 nla_for_each_nested(a, attr, rem) {
1232 int type = nla_type(a);
1233 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
1234 [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
1235 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
1236 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
1237 [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
1238 [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
1239 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
1240 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
1241 };
1242
1243 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
1244 OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
1245 type, OVS_TUNNEL_KEY_ATTR_MAX);
888 return -EINVAL; 1246 return -EINVAL;
889 *attrs &= ~(1 << OVS_KEY_ATTR_UDP); 1247 }
890 1248
891 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); 1249 if (ovs_tunnel_key_lens[type] != nla_len(a)) {
892 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 1250 OVS_NLERR("IPv4 tunnel attribute type has unexpected "
893 swkey->ipv6.tp.src = udp_key->udp_src; 1251 " length (type=%d, length=%d, expected=%d).\n",
894 swkey->ipv6.tp.dst = udp_key->udp_dst; 1252 type, nla_len(a), ovs_tunnel_key_lens[type]);
895 break; 1253 return -EINVAL;
1254 }
896 1255
897 case IPPROTO_ICMPV6: 1256 switch (type) {
898 if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6))) 1257 case OVS_TUNNEL_KEY_ATTR_ID:
1258 SW_FLOW_KEY_PUT(match, tun_key.tun_id,
1259 nla_get_be64(a), is_mask);
1260 tun_flags |= TUNNEL_KEY;
1261 break;
1262 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
1263 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
1264 nla_get_be32(a), is_mask);
1265 break;
1266 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
1267 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
1268 nla_get_be32(a), is_mask);
1269 break;
1270 case OVS_TUNNEL_KEY_ATTR_TOS:
1271 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
1272 nla_get_u8(a), is_mask);
1273 break;
1274 case OVS_TUNNEL_KEY_ATTR_TTL:
1275 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
1276 nla_get_u8(a), is_mask);
1277 ttl = true;
1278 break;
1279 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1280 tun_flags |= TUNNEL_DONT_FRAGMENT;
1281 break;
1282 case OVS_TUNNEL_KEY_ATTR_CSUM:
1283 tun_flags |= TUNNEL_CSUM;
1284 break;
1285 default:
899 return -EINVAL; 1286 return -EINVAL;
900 *attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 1287 }
1288 }
901 1289
902 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); 1290 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
903 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
904 swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
905 swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
906 1291
907 if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || 1292 if (rem > 0) {
908 swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 1293 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
909 const struct ovs_key_nd *nd_key; 1294 return -EINVAL;
1295 }
910 1296
911 if (!(*attrs & (1 << OVS_KEY_ATTR_ND))) 1297 if (!is_mask) {
912 return -EINVAL; 1298 if (!match->key->tun_key.ipv4_dst) {
913 *attrs &= ~(1 << OVS_KEY_ATTR_ND); 1299 OVS_NLERR("IPv4 tunnel destination address is zero.\n");
914 1300 return -EINVAL;
915 *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd); 1301 }
916 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 1302
917 memcpy(&swkey->ipv6.nd.target, nd_key->nd_target, 1303 if (!ttl) {
918 sizeof(swkey->ipv6.nd.target)); 1304 OVS_NLERR("IPv4 tunnel TTL not specified.\n");
919 memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN); 1305 return -EINVAL;
920 memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
921 } 1306 }
922 break;
923 } 1307 }
924 1308
925 return 0; 1309 return 0;
926} 1310}
927 1311
928static int parse_flow_nlattrs(const struct nlattr *attr, 1312int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
929 const struct nlattr *a[], u32 *attrsp) 1313 const struct ovs_key_ipv4_tunnel *tun_key,
1314 const struct ovs_key_ipv4_tunnel *output)
930{ 1315{
931 const struct nlattr *nla; 1316 struct nlattr *nla;
932 u32 attrs;
933 int rem;
934 1317
935 attrs = 0; 1318 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
936 nla_for_each_nested(nla, attr, rem) { 1319 if (!nla)
937 u16 type = nla_type(nla); 1320 return -EMSGSIZE;
938 int expected_len; 1321
1322 if (output->tun_flags & TUNNEL_KEY &&
1323 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
1324 return -EMSGSIZE;
1325 if (output->ipv4_src &&
1326 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
1327 return -EMSGSIZE;
1328 if (output->ipv4_dst &&
1329 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
1330 return -EMSGSIZE;
1331 if (output->ipv4_tos &&
1332 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
1333 return -EMSGSIZE;
1334 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
1335 return -EMSGSIZE;
1336 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
1337 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
1338 return -EMSGSIZE;
1339 if ((output->tun_flags & TUNNEL_CSUM) &&
1340 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
1341 return -EMSGSIZE;
1342
1343 nla_nest_end(skb, nla);
1344 return 0;
1345}
939 1346
940 if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type)) 1347static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
941 return -EINVAL; 1348 const struct nlattr **a, bool is_mask)
1349{
1350 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1351 SW_FLOW_KEY_PUT(match, phy.priority,
1352 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1353 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1354 }
942 1355
943 expected_len = ovs_key_lens[type]; 1356 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
944 if (nla_len(nla) != expected_len && expected_len != -1) 1357 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1358
1359 if (is_mask)
1360 in_port = 0xffffffff; /* Always exact match in_port. */
1361 else if (in_port >= DP_MAX_PORTS)
945 return -EINVAL; 1362 return -EINVAL;
946 1363
947 attrs |= 1 << type; 1364 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
948 a[type] = nla; 1365 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1366 } else if (!is_mask) {
1367 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
949 } 1368 }
950 if (rem)
951 return -EINVAL;
952 1369
953 *attrsp = attrs; 1370 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1371 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1372
1373 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1374 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1375 }
1376 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1377 if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1378 is_mask))
1379 return -EINVAL;
1380 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1381 }
954 return 0; 1382 return 0;
955} 1383}
956 1384
957/** 1385static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
958 * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. 1386 const struct nlattr **a, bool is_mask)
959 * @swkey: receives the extracted flow key.
960 * @key_lenp: number of bytes used in @swkey.
961 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
962 * sequence.
963 */
964int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
965 const struct nlattr *attr)
966{ 1387{
967 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
968 const struct ovs_key_ethernet *eth_key;
969 int key_len;
970 u32 attrs;
971 int err; 1388 int err;
1389 u64 orig_attrs = attrs;
972 1390
973 memset(swkey, 0, sizeof(struct sw_flow_key)); 1391 err = metadata_from_nlattrs(match, &attrs, a, is_mask);
974 key_len = SW_FLOW_KEY_OFFSET(eth);
975
976 err = parse_flow_nlattrs(attr, a, &attrs);
977 if (err) 1392 if (err)
978 return err; 1393 return err;
979 1394
980 /* Metadata attributes. */ 1395 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
981 if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 1396 const struct ovs_key_ethernet *eth_key;
982 swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]);
983 attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
984 }
985 if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
986 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
987 if (in_port >= DP_MAX_PORTS)
988 return -EINVAL;
989 swkey->phy.in_port = in_port;
990 attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
991 } else {
992 swkey->phy.in_port = DP_MAX_PORTS;
993 }
994 if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
995 swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
996 attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
997 }
998
999 /* Data attributes. */
1000 if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
1001 return -EINVAL;
1002 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1003 1397
1004 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 1398 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1005 memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN); 1399 SW_FLOW_KEY_MEMCPY(match, eth.src,
1006 memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN); 1400 eth_key->eth_src, ETH_ALEN, is_mask);
1401 SW_FLOW_KEY_MEMCPY(match, eth.dst,
1402 eth_key->eth_dst, ETH_ALEN, is_mask);
1403 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1404 }
1007 1405
1008 if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) && 1406 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1009 nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
1010 const struct nlattr *encap;
1011 __be16 tci; 1407 __be16 tci;
1012 1408
1013 if (attrs != ((1 << OVS_KEY_ATTR_VLAN) |
1014 (1 << OVS_KEY_ATTR_ETHERTYPE) |
1015 (1 << OVS_KEY_ATTR_ENCAP)))
1016 return -EINVAL;
1017
1018 encap = a[OVS_KEY_ATTR_ENCAP];
1019 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1409 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1020 if (tci & htons(VLAN_TAG_PRESENT)) { 1410 if (!(tci & htons(VLAN_TAG_PRESENT))) {
1021 swkey->eth.tci = tci; 1411 if (is_mask)
1022 1412 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
1023 err = parse_flow_nlattrs(encap, a, &attrs); 1413 else
1024 if (err) 1414 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
1025 return err;
1026 } else if (!tci) {
1027 /* Corner case for truncated 802.1Q header. */
1028 if (nla_len(encap))
1029 return -EINVAL;
1030 1415
1031 swkey->eth.type = htons(ETH_P_8021Q);
1032 *key_lenp = key_len;
1033 return 0;
1034 } else {
1035 return -EINVAL; 1416 return -EINVAL;
1036 } 1417 }
1037 } 1418
1419 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
1420 attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1421 } else if (!is_mask)
1422 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
1038 1423
1039 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1424 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1040 swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1425 __be16 eth_type;
1041 if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN) 1426
1427 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1428 if (is_mask) {
1429 /* Always exact match EtherType. */
1430 eth_type = htons(0xffff);
1431 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
1432 OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
1433 ntohs(eth_type), ETH_P_802_3_MIN);
1042 return -EINVAL; 1434 return -EINVAL;
1435 }
1436
1437 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1043 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1438 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1044 } else { 1439 } else if (!is_mask) {
1045 swkey->eth.type = htons(ETH_P_802_2); 1440 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1046 } 1441 }
1047 1442
1048 if (swkey->eth.type == htons(ETH_P_IP)) { 1443 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1049 const struct ovs_key_ipv4 *ipv4_key; 1444 const struct ovs_key_ipv4 *ipv4_key;
1050 1445
1051 if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
1052 return -EINVAL;
1053 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1054
1055 key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
1056 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 1446 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1057 if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) 1447 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1448 OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
1449 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1058 return -EINVAL; 1450 return -EINVAL;
1059 swkey->ip.proto = ipv4_key->ipv4_proto;
1060 swkey->ip.tos = ipv4_key->ipv4_tos;
1061 swkey->ip.ttl = ipv4_key->ipv4_ttl;
1062 swkey->ip.frag = ipv4_key->ipv4_frag;
1063 swkey->ipv4.addr.src = ipv4_key->ipv4_src;
1064 swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
1065
1066 if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1067 err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs);
1068 if (err)
1069 return err;
1070 } 1451 }
1071 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1452 SW_FLOW_KEY_PUT(match, ip.proto,
1072 const struct ovs_key_ipv6 *ipv6_key; 1453 ipv4_key->ipv4_proto, is_mask);
1454 SW_FLOW_KEY_PUT(match, ip.tos,
1455 ipv4_key->ipv4_tos, is_mask);
1456 SW_FLOW_KEY_PUT(match, ip.ttl,
1457 ipv4_key->ipv4_ttl, is_mask);
1458 SW_FLOW_KEY_PUT(match, ip.frag,
1459 ipv4_key->ipv4_frag, is_mask);
1460 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1461 ipv4_key->ipv4_src, is_mask);
1462 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1463 ipv4_key->ipv4_dst, is_mask);
1464 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1465 }
1073 1466
1074 if (!(attrs & (1 << OVS_KEY_ATTR_IPV6))) 1467 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1075 return -EINVAL; 1468 const struct ovs_key_ipv6 *ipv6_key;
1076 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1077 1469
1078 key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
1079 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 1470 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1080 if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) 1471 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1472 OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
1473 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1081 return -EINVAL; 1474 return -EINVAL;
1082 swkey->ipv6.label = ipv6_key->ipv6_label;
1083 swkey->ip.proto = ipv6_key->ipv6_proto;
1084 swkey->ip.tos = ipv6_key->ipv6_tclass;
1085 swkey->ip.ttl = ipv6_key->ipv6_hlimit;
1086 swkey->ip.frag = ipv6_key->ipv6_frag;
1087 memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
1088 sizeof(swkey->ipv6.addr.src));
1089 memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
1090 sizeof(swkey->ipv6.addr.dst));
1091
1092 if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1093 err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs);
1094 if (err)
1095 return err;
1096 } 1475 }
1097 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1476 SW_FLOW_KEY_PUT(match, ipv6.label,
1098 swkey->eth.type == htons(ETH_P_RARP)) { 1477 ipv6_key->ipv6_label, is_mask);
1478 SW_FLOW_KEY_PUT(match, ip.proto,
1479 ipv6_key->ipv6_proto, is_mask);
1480 SW_FLOW_KEY_PUT(match, ip.tos,
1481 ipv6_key->ipv6_tclass, is_mask);
1482 SW_FLOW_KEY_PUT(match, ip.ttl,
1483 ipv6_key->ipv6_hlimit, is_mask);
1484 SW_FLOW_KEY_PUT(match, ip.frag,
1485 ipv6_key->ipv6_frag, is_mask);
1486 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1487 ipv6_key->ipv6_src,
1488 sizeof(match->key->ipv6.addr.src),
1489 is_mask);
1490 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1491 ipv6_key->ipv6_dst,
1492 sizeof(match->key->ipv6.addr.dst),
1493 is_mask);
1494
1495 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1496 }
1497
1498 if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1099 const struct ovs_key_arp *arp_key; 1499 const struct ovs_key_arp *arp_key;
1100 1500
1101 if (!(attrs & (1 << OVS_KEY_ATTR_ARP))) 1501 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1502 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1503 OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
1504 arp_key->arp_op);
1102 return -EINVAL; 1505 return -EINVAL;
1506 }
1507
1508 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1509 arp_key->arp_sip, is_mask);
1510 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1511 arp_key->arp_tip, is_mask);
1512 SW_FLOW_KEY_PUT(match, ip.proto,
1513 ntohs(arp_key->arp_op), is_mask);
1514 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1515 arp_key->arp_sha, ETH_ALEN, is_mask);
1516 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1517 arp_key->arp_tha, ETH_ALEN, is_mask);
1518
1103 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1519 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1520 }
1104 1521
1105 key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); 1522 if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1106 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1523 const struct ovs_key_tcp *tcp_key;
1107 swkey->ipv4.addr.src = arp_key->arp_sip; 1524
1108 swkey->ipv4.addr.dst = arp_key->arp_tip; 1525 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1109 if (arp_key->arp_op & htons(0xff00)) 1526 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1527 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1528 tcp_key->tcp_src, is_mask);
1529 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1530 tcp_key->tcp_dst, is_mask);
1531 } else {
1532 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1533 tcp_key->tcp_src, is_mask);
1534 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1535 tcp_key->tcp_dst, is_mask);
1536 }
1537 attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1538 }
1539
1540 if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1541 const struct ovs_key_udp *udp_key;
1542
1543 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1544 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1545 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1546 udp_key->udp_src, is_mask);
1547 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1548 udp_key->udp_dst, is_mask);
1549 } else {
1550 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1551 udp_key->udp_src, is_mask);
1552 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1553 udp_key->udp_dst, is_mask);
1554 }
1555 attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1556 }
1557
1558 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1559 const struct ovs_key_sctp *sctp_key;
1560
1561 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1562 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1563 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1564 sctp_key->sctp_src, is_mask);
1565 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1566 sctp_key->sctp_dst, is_mask);
1567 } else {
1568 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1569 sctp_key->sctp_src, is_mask);
1570 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1571 sctp_key->sctp_dst, is_mask);
1572 }
1573 attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1574 }
1575
1576 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1577 const struct ovs_key_icmp *icmp_key;
1578
1579 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1580 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1581 htons(icmp_key->icmp_type), is_mask);
1582 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1583 htons(icmp_key->icmp_code), is_mask);
1584 attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1585 }
1586
1587 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1588 const struct ovs_key_icmpv6 *icmpv6_key;
1589
1590 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1591 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1592 htons(icmpv6_key->icmpv6_type), is_mask);
1593 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1594 htons(icmpv6_key->icmpv6_code), is_mask);
1595 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1596 }
1597
1598 if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1599 const struct ovs_key_nd *nd_key;
1600
1601 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1602 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1603 nd_key->nd_target,
1604 sizeof(match->key->ipv6.nd.target),
1605 is_mask);
1606 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1607 nd_key->nd_sll, ETH_ALEN, is_mask);
1608 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1609 nd_key->nd_tll, ETH_ALEN, is_mask);
1610 attrs &= ~(1 << OVS_KEY_ATTR_ND);
1611 }
1612
1613 if (attrs != 0)
1614 return -EINVAL;
1615
1616 return 0;
1617}
1618
1619/**
1620 * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and
1621 * mask. In case the 'mask' is NULL, the flow is treated as exact match
1622 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1623 * does not include any don't care bit.
1624 * @match: receives the extracted flow match information.
1625 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1626 * sequence. The fields should of the packet that triggered the creation
1627 * of this flow.
1628 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1629 * attribute specifies the mask field of the wildcarded flow.
1630 */
1631int ovs_match_from_nlattrs(struct sw_flow_match *match,
1632 const struct nlattr *key,
1633 const struct nlattr *mask)
1634{
1635 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1636 const struct nlattr *encap;
1637 u64 key_attrs = 0;
1638 u64 mask_attrs = 0;
1639 bool encap_valid = false;
1640 int err;
1641
1642 err = parse_flow_nlattrs(key, a, &key_attrs);
1643 if (err)
1644 return err;
1645
1646 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
1647 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
1648 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
1649 __be16 tci;
1650
1651 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
1652 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
1653 OVS_NLERR("Invalid Vlan frame.\n");
1110 return -EINVAL; 1654 return -EINVAL;
1111 swkey->ip.proto = ntohs(arp_key->arp_op); 1655 }
1112 memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN); 1656
1113 memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN); 1657 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1658 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1659 encap = a[OVS_KEY_ATTR_ENCAP];
1660 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1661 encap_valid = true;
1662
1663 if (tci & htons(VLAN_TAG_PRESENT)) {
1664 err = parse_flow_nlattrs(encap, a, &key_attrs);
1665 if (err)
1666 return err;
1667 } else if (!tci) {
1668 /* Corner case for truncated 802.1Q header. */
1669 if (nla_len(encap)) {
1670 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
1671 return -EINVAL;
1672 }
1673 } else {
1674 OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
1675 return -EINVAL;
1676 }
1677 }
1678
1679 err = ovs_key_from_nlattrs(match, key_attrs, a, false);
1680 if (err)
1681 return err;
1682
1683 if (mask) {
1684 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
1685 if (err)
1686 return err;
1687
1688 if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
1689 __be16 eth_type = 0;
1690 __be16 tci = 0;
1691
1692 if (!encap_valid) {
1693 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
1694 return -EINVAL;
1695 }
1696
1697 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1698 if (a[OVS_KEY_ATTR_ETHERTYPE])
1699 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1700
1701 if (eth_type == htons(0xffff)) {
1702 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1703 encap = a[OVS_KEY_ATTR_ENCAP];
1704 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
1705 } else {
1706 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
1707 ntohs(eth_type));
1708 return -EINVAL;
1709 }
1710
1711 if (a[OVS_KEY_ATTR_VLAN])
1712 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1713
1714 if (!(tci & htons(VLAN_TAG_PRESENT))) {
1715 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
1716 return -EINVAL;
1717 }
1718 }
1719
1720 err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
1721 if (err)
1722 return err;
1723 } else {
1724 /* Populate exact match flow's key mask. */
1725 if (match->mask)
1726 ovs_sw_flow_mask_set(match->mask, &match->range, 0xff);
1114 } 1727 }
1115 1728
1116 if (attrs) 1729 if (!ovs_match_validate(match, key_attrs, mask_attrs))
1117 return -EINVAL; 1730 return -EINVAL;
1118 *key_lenp = key_len;
1119 1731
1120 return 0; 1732 return 0;
1121} 1733}
1122 1734
1123/** 1735/**
1124 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. 1736 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
1125 * @priority: receives the skb priority 1737 * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
1126 * @mark: receives the skb mark 1738 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1127 * @in_port: receives the extracted input port.
1128 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1129 * sequence. 1739 * sequence.
1130 * 1740 *
1131 * This parses a series of Netlink attributes that form a flow key, which must 1741 * This parses a series of Netlink attributes that form a flow key, which must
@@ -1133,84 +1743,100 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1133 * get the metadata, that is, the parts of the flow key that cannot be 1743 * get the metadata, that is, the parts of the flow key that cannot be
1134 * extracted from the packet itself. 1744 * extracted from the packet itself.
1135 */ 1745 */
1136int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port,
1137 const struct nlattr *attr)
1138{
1139 const struct nlattr *nla;
1140 int rem;
1141 1746
1142 *in_port = DP_MAX_PORTS; 1747int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
1143 *priority = 0; 1748 const struct nlattr *attr)
1144 *mark = 0; 1749{
1750 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
1751 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1752 u64 attrs = 0;
1753 int err;
1754 struct sw_flow_match match;
1145 1755
1146 nla_for_each_nested(nla, attr, rem) { 1756 flow->key.phy.in_port = DP_MAX_PORTS;
1147 int type = nla_type(nla); 1757 flow->key.phy.priority = 0;
1758 flow->key.phy.skb_mark = 0;
1759 memset(tun_key, 0, sizeof(flow->key.tun_key));
1148 1760
1149 if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { 1761 err = parse_flow_nlattrs(attr, a, &attrs);
1150 if (nla_len(nla) != ovs_key_lens[type]) 1762 if (err)
1151 return -EINVAL; 1763 return -EINVAL;
1152 1764
1153 switch (type) { 1765 memset(&match, 0, sizeof(match));
1154 case OVS_KEY_ATTR_PRIORITY: 1766 match.key = &flow->key;
1155 *priority = nla_get_u32(nla);
1156 break;
1157 1767
1158 case OVS_KEY_ATTR_IN_PORT: 1768 err = metadata_from_nlattrs(&match, &attrs, a, false);
1159 if (nla_get_u32(nla) >= DP_MAX_PORTS) 1769 if (err)
1160 return -EINVAL; 1770 return err;
1161 *in_port = nla_get_u32(nla);
1162 break;
1163 1771
1164 case OVS_KEY_ATTR_SKB_MARK:
1165 *mark = nla_get_u32(nla);
1166 break;
1167 }
1168 }
1169 }
1170 if (rem)
1171 return -EINVAL;
1172 return 0; 1772 return 0;
1173} 1773}
1174 1774
1175int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) 1775int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
1776 const struct sw_flow_key *output, struct sk_buff *skb)
1176{ 1777{
1177 struct ovs_key_ethernet *eth_key; 1778 struct ovs_key_ethernet *eth_key;
1178 struct nlattr *nla, *encap; 1779 struct nlattr *nla, *encap;
1780 bool is_mask = (swkey != output);
1179 1781
1180 if (swkey->phy.priority && 1782 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1181 nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
1182 goto nla_put_failure; 1783 goto nla_put_failure;
1183 1784
1184 if (swkey->phy.in_port != DP_MAX_PORTS && 1785 if ((swkey->tun_key.ipv4_dst || is_mask) &&
1185 nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) 1786 ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
1186 goto nla_put_failure; 1787 goto nla_put_failure;
1187 1788
1188 if (swkey->phy.skb_mark && 1789 if (swkey->phy.in_port == DP_MAX_PORTS) {
1189 nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark)) 1790 if (is_mask && (output->phy.in_port == 0xffff))
1791 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1792 goto nla_put_failure;
1793 } else {
1794 u16 upper_u16;
1795 upper_u16 = !is_mask ? 0 : 0xffff;
1796
1797 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1798 (upper_u16 << 16) | output->phy.in_port))
1799 goto nla_put_failure;
1800 }
1801
1802 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
1190 goto nla_put_failure; 1803 goto nla_put_failure;
1191 1804
1192 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1805 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1193 if (!nla) 1806 if (!nla)
1194 goto nla_put_failure; 1807 goto nla_put_failure;
1808
1195 eth_key = nla_data(nla); 1809 eth_key = nla_data(nla);
1196 memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN); 1810 memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
1197 memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN); 1811 memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
1198 1812
1199 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 1813 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
1200 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)) || 1814 __be16 eth_type;
1201 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci)) 1815 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
1816 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1817 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
1202 goto nla_put_failure; 1818 goto nla_put_failure;
1203 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1819 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1204 if (!swkey->eth.tci) 1820 if (!swkey->eth.tci)
1205 goto unencap; 1821 goto unencap;
1206 } else { 1822 } else
1207 encap = NULL; 1823 encap = NULL;
1208 }
1209 1824
1210 if (swkey->eth.type == htons(ETH_P_802_2)) 1825 if (swkey->eth.type == htons(ETH_P_802_2)) {
1826 /*
1827 * Ethertype 802.2 is represented in the netlink with omitted
1828 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1829 * 0xffff in the mask attribute. Ethertype can also
1830 * be wildcarded.
1831 */
1832 if (is_mask && output->eth.type)
1833 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1834 output->eth.type))
1835 goto nla_put_failure;
1211 goto unencap; 1836 goto unencap;
1837 }
1212 1838
1213 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type)) 1839 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
1214 goto nla_put_failure; 1840 goto nla_put_failure;
1215 1841
1216 if (swkey->eth.type == htons(ETH_P_IP)) { 1842 if (swkey->eth.type == htons(ETH_P_IP)) {
@@ -1220,12 +1846,12 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1220 if (!nla) 1846 if (!nla)
1221 goto nla_put_failure; 1847 goto nla_put_failure;
1222 ipv4_key = nla_data(nla); 1848 ipv4_key = nla_data(nla);
1223 ipv4_key->ipv4_src = swkey->ipv4.addr.src; 1849 ipv4_key->ipv4_src = output->ipv4.addr.src;
1224 ipv4_key->ipv4_dst = swkey->ipv4.addr.dst; 1850 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
1225 ipv4_key->ipv4_proto = swkey->ip.proto; 1851 ipv4_key->ipv4_proto = output->ip.proto;
1226 ipv4_key->ipv4_tos = swkey->ip.tos; 1852 ipv4_key->ipv4_tos = output->ip.tos;
1227 ipv4_key->ipv4_ttl = swkey->ip.ttl; 1853 ipv4_key->ipv4_ttl = output->ip.ttl;
1228 ipv4_key->ipv4_frag = swkey->ip.frag; 1854 ipv4_key->ipv4_frag = output->ip.frag;
1229 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1855 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1230 struct ovs_key_ipv6 *ipv6_key; 1856 struct ovs_key_ipv6 *ipv6_key;
1231 1857
@@ -1233,15 +1859,15 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1233 if (!nla) 1859 if (!nla)
1234 goto nla_put_failure; 1860 goto nla_put_failure;
1235 ipv6_key = nla_data(nla); 1861 ipv6_key = nla_data(nla);
1236 memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src, 1862 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
1237 sizeof(ipv6_key->ipv6_src)); 1863 sizeof(ipv6_key->ipv6_src));
1238 memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst, 1864 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
1239 sizeof(ipv6_key->ipv6_dst)); 1865 sizeof(ipv6_key->ipv6_dst));
1240 ipv6_key->ipv6_label = swkey->ipv6.label; 1866 ipv6_key->ipv6_label = output->ipv6.label;
1241 ipv6_key->ipv6_proto = swkey->ip.proto; 1867 ipv6_key->ipv6_proto = output->ip.proto;
1242 ipv6_key->ipv6_tclass = swkey->ip.tos; 1868 ipv6_key->ipv6_tclass = output->ip.tos;
1243 ipv6_key->ipv6_hlimit = swkey->ip.ttl; 1869 ipv6_key->ipv6_hlimit = output->ip.ttl;
1244 ipv6_key->ipv6_frag = swkey->ip.frag; 1870 ipv6_key->ipv6_frag = output->ip.frag;
1245 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1871 } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1246 swkey->eth.type == htons(ETH_P_RARP)) { 1872 swkey->eth.type == htons(ETH_P_RARP)) {
1247 struct ovs_key_arp *arp_key; 1873 struct ovs_key_arp *arp_key;
@@ -1251,11 +1877,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1251 goto nla_put_failure; 1877 goto nla_put_failure;
1252 arp_key = nla_data(nla); 1878 arp_key = nla_data(nla);
1253 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1879 memset(arp_key, 0, sizeof(struct ovs_key_arp));
1254 arp_key->arp_sip = swkey->ipv4.addr.src; 1880 arp_key->arp_sip = output->ipv4.addr.src;
1255 arp_key->arp_tip = swkey->ipv4.addr.dst; 1881 arp_key->arp_tip = output->ipv4.addr.dst;
1256 arp_key->arp_op = htons(swkey->ip.proto); 1882 arp_key->arp_op = htons(output->ip.proto);
1257 memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN); 1883 memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
1258 memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN); 1884 memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
1259 } 1885 }
1260 1886
1261 if ((swkey->eth.type == htons(ETH_P_IP) || 1887 if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1270,11 +1896,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1270 goto nla_put_failure; 1896 goto nla_put_failure;
1271 tcp_key = nla_data(nla); 1897 tcp_key = nla_data(nla);
1272 if (swkey->eth.type == htons(ETH_P_IP)) { 1898 if (swkey->eth.type == htons(ETH_P_IP)) {
1273 tcp_key->tcp_src = swkey->ipv4.tp.src; 1899 tcp_key->tcp_src = output->ipv4.tp.src;
1274 tcp_key->tcp_dst = swkey->ipv4.tp.dst; 1900 tcp_key->tcp_dst = output->ipv4.tp.dst;
1275 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1901 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1276 tcp_key->tcp_src = swkey->ipv6.tp.src; 1902 tcp_key->tcp_src = output->ipv6.tp.src;
1277 tcp_key->tcp_dst = swkey->ipv6.tp.dst; 1903 tcp_key->tcp_dst = output->ipv6.tp.dst;
1278 } 1904 }
1279 } else if (swkey->ip.proto == IPPROTO_UDP) { 1905 } else if (swkey->ip.proto == IPPROTO_UDP) {
1280 struct ovs_key_udp *udp_key; 1906 struct ovs_key_udp *udp_key;
@@ -1284,11 +1910,25 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1284 goto nla_put_failure; 1910 goto nla_put_failure;
1285 udp_key = nla_data(nla); 1911 udp_key = nla_data(nla);
1286 if (swkey->eth.type == htons(ETH_P_IP)) { 1912 if (swkey->eth.type == htons(ETH_P_IP)) {
1287 udp_key->udp_src = swkey->ipv4.tp.src; 1913 udp_key->udp_src = output->ipv4.tp.src;
1288 udp_key->udp_dst = swkey->ipv4.tp.dst; 1914 udp_key->udp_dst = output->ipv4.tp.dst;
1915 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1916 udp_key->udp_src = output->ipv6.tp.src;
1917 udp_key->udp_dst = output->ipv6.tp.dst;
1918 }
1919 } else if (swkey->ip.proto == IPPROTO_SCTP) {
1920 struct ovs_key_sctp *sctp_key;
1921
1922 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1923 if (!nla)
1924 goto nla_put_failure;
1925 sctp_key = nla_data(nla);
1926 if (swkey->eth.type == htons(ETH_P_IP)) {
1927 sctp_key->sctp_src = swkey->ipv4.tp.src;
1928 sctp_key->sctp_dst = swkey->ipv4.tp.dst;
1289 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1929 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1290 udp_key->udp_src = swkey->ipv6.tp.src; 1930 sctp_key->sctp_src = swkey->ipv6.tp.src;
1291 udp_key->udp_dst = swkey->ipv6.tp.dst; 1931 sctp_key->sctp_dst = swkey->ipv6.tp.dst;
1292 } 1932 }
1293 } else if (swkey->eth.type == htons(ETH_P_IP) && 1933 } else if (swkey->eth.type == htons(ETH_P_IP) &&
1294 swkey->ip.proto == IPPROTO_ICMP) { 1934 swkey->ip.proto == IPPROTO_ICMP) {
@@ -1298,8 +1938,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1298 if (!nla) 1938 if (!nla)
1299 goto nla_put_failure; 1939 goto nla_put_failure;
1300 icmp_key = nla_data(nla); 1940 icmp_key = nla_data(nla);
1301 icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src); 1941 icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
1302 icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst); 1942 icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
1303 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1943 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1304 swkey->ip.proto == IPPROTO_ICMPV6) { 1944 swkey->ip.proto == IPPROTO_ICMPV6) {
1305 struct ovs_key_icmpv6 *icmpv6_key; 1945 struct ovs_key_icmpv6 *icmpv6_key;
@@ -1309,8 +1949,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1309 if (!nla) 1949 if (!nla)
1310 goto nla_put_failure; 1950 goto nla_put_failure;
1311 icmpv6_key = nla_data(nla); 1951 icmpv6_key = nla_data(nla);
1312 icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src); 1952 icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
1313 icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst); 1953 icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
1314 1954
1315 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1955 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1316 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1956 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@@ -1320,10 +1960,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1320 if (!nla) 1960 if (!nla)
1321 goto nla_put_failure; 1961 goto nla_put_failure;
1322 nd_key = nla_data(nla); 1962 nd_key = nla_data(nla);
1323 memcpy(nd_key->nd_target, &swkey->ipv6.nd.target, 1963 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1324 sizeof(nd_key->nd_target)); 1964 sizeof(nd_key->nd_target));
1325 memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN); 1965 memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
1326 memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN); 1966 memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
1327 } 1967 }
1328 } 1968 }
1329 } 1969 }
@@ -1342,6 +1982,9 @@ nla_put_failure:
1342 * Returns zero if successful or a negative error code. */ 1982 * Returns zero if successful or a negative error code. */
1343int ovs_flow_init(void) 1983int ovs_flow_init(void)
1344{ 1984{
1985 BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
1986 BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
1987
1345 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, 1988 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
1346 0, NULL); 1989 0, NULL);
1347 if (flow_cache == NULL) 1990 if (flow_cache == NULL)
@@ -1355,3 +1998,84 @@ void ovs_flow_exit(void)
1355{ 1998{
1356 kmem_cache_destroy(flow_cache); 1999 kmem_cache_destroy(flow_cache);
1357} 2000}
2001
2002struct sw_flow_mask *ovs_sw_flow_mask_alloc(void)
2003{
2004 struct sw_flow_mask *mask;
2005
2006 mask = kmalloc(sizeof(*mask), GFP_KERNEL);
2007 if (mask)
2008 mask->ref_count = 0;
2009
2010 return mask;
2011}
2012
2013void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask)
2014{
2015 mask->ref_count++;
2016}
2017
2018void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
2019{
2020 if (!mask)
2021 return;
2022
2023 BUG_ON(!mask->ref_count);
2024 mask->ref_count--;
2025
2026 if (!mask->ref_count) {
2027 list_del_rcu(&mask->list);
2028 if (deferred)
2029 kfree_rcu(mask, rcu);
2030 else
2031 kfree(mask);
2032 }
2033}
2034
2035static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a,
2036 const struct sw_flow_mask *b)
2037{
2038 u8 *a_ = (u8 *)&a->key + a->range.start;
2039 u8 *b_ = (u8 *)&b->key + b->range.start;
2040
2041 return (a->range.end == b->range.end)
2042 && (a->range.start == b->range.start)
2043 && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
2044}
2045
2046struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl,
2047 const struct sw_flow_mask *mask)
2048{
2049 struct list_head *ml;
2050
2051 list_for_each(ml, tbl->mask_list) {
2052 struct sw_flow_mask *m;
2053 m = container_of(ml, struct sw_flow_mask, list);
2054 if (ovs_sw_flow_mask_equal(mask, m))
2055 return m;
2056 }
2057
2058 return NULL;
2059}
2060
2061/**
2062 * add a new mask into the mask list.
2063 * The caller needs to make sure that 'mask' is not the same
2064 * as any masks that are already on the list.
2065 */
2066void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask)
2067{
2068 list_add_rcu(&mask->list, tbl->mask_list);
2069}
2070
2071/**
2072 * Set 'range' fields in the mask to the value of 'val'.
2073 */
2074static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
2075 struct sw_flow_key_range *range, u8 val)
2076{
2077 u8 *m = (u8 *)&mask->key + range->start;
2078
2079 mask->range = *range;
2080 memset(m, val, range_n_bytes(range));
2081}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 0875fde65b9c..212fbf7510c4 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2011 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -33,6 +33,8 @@
33#include <net/inet_ecn.h> 33#include <net/inet_ecn.h>
34 34
35struct sk_buff; 35struct sk_buff;
36struct sw_flow_mask;
37struct flow_table;
36 38
37struct sw_flow_actions { 39struct sw_flow_actions {
38 struct rcu_head rcu; 40 struct rcu_head rcu;
@@ -40,7 +42,38 @@ struct sw_flow_actions {
40 struct nlattr actions[]; 42 struct nlattr actions[];
41}; 43};
42 44
45/* Used to memset ovs_key_ipv4_tunnel padding. */
46#define OVS_TUNNEL_KEY_SIZE \
47 (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \
48 FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
49
50struct ovs_key_ipv4_tunnel {
51 __be64 tun_id;
52 __be32 ipv4_src;
53 __be32 ipv4_dst;
54 __be16 tun_flags;
55 u8 ipv4_tos;
56 u8 ipv4_ttl;
57};
58
59static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
60 const struct iphdr *iph, __be64 tun_id,
61 __be16 tun_flags)
62{
63 tun_key->tun_id = tun_id;
64 tun_key->ipv4_src = iph->saddr;
65 tun_key->ipv4_dst = iph->daddr;
66 tun_key->ipv4_tos = iph->tos;
67 tun_key->ipv4_ttl = iph->ttl;
68 tun_key->tun_flags = tun_flags;
69
70 /* clear struct padding. */
71 memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0,
72 sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE);
73}
74
43struct sw_flow_key { 75struct sw_flow_key {
76 struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
44 struct { 77 struct {
45 u32 priority; /* Packet QoS priority. */ 78 u32 priority; /* Packet QoS priority. */
46 u32 skb_mark; /* SKB mark. */ 79 u32 skb_mark; /* SKB mark. */
@@ -66,8 +99,8 @@ struct sw_flow_key {
66 } addr; 99 } addr;
67 union { 100 union {
68 struct { 101 struct {
69 __be16 src; /* TCP/UDP source port. */ 102 __be16 src; /* TCP/UDP/SCTP source port. */
70 __be16 dst; /* TCP/UDP destination port. */ 103 __be16 dst; /* TCP/UDP/SCTP destination port. */
71 } tp; 104 } tp;
72 struct { 105 struct {
73 u8 sha[ETH_ALEN]; /* ARP source hardware address. */ 106 u8 sha[ETH_ALEN]; /* ARP source hardware address. */
@@ -82,8 +115,8 @@ struct sw_flow_key {
82 } addr; 115 } addr;
83 __be32 label; /* IPv6 flow label. */ 116 __be32 label; /* IPv6 flow label. */
84 struct { 117 struct {
85 __be16 src; /* TCP/UDP source port. */ 118 __be16 src; /* TCP/UDP/SCTP source port. */
86 __be16 dst; /* TCP/UDP destination port. */ 119 __be16 dst; /* TCP/UDP/SCTP destination port. */
87 } tp; 120 } tp;
88 struct { 121 struct {
89 struct in6_addr target; /* ND target address. */ 122 struct in6_addr target; /* ND target address. */
@@ -92,7 +125,7 @@ struct sw_flow_key {
92 } nd; 125 } nd;
93 } ipv6; 126 } ipv6;
94 }; 127 };
95}; 128} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
96 129
97struct sw_flow { 130struct sw_flow {
98 struct rcu_head rcu; 131 struct rcu_head rcu;
@@ -100,6 +133,8 @@ struct sw_flow {
100 u32 hash; 133 u32 hash;
101 134
102 struct sw_flow_key key; 135 struct sw_flow_key key;
136 struct sw_flow_key unmasked_key;
137 struct sw_flow_mask *mask;
103 struct sw_flow_actions __rcu *sf_acts; 138 struct sw_flow_actions __rcu *sf_acts;
104 139
105 spinlock_t lock; /* Lock for values below. */ 140 spinlock_t lock; /* Lock for values below. */
@@ -109,6 +144,20 @@ struct sw_flow {
109 u8 tcp_flags; /* Union of seen TCP flags. */ 144 u8 tcp_flags; /* Union of seen TCP flags. */
110}; 145};
111 146
147struct sw_flow_key_range {
148 size_t start;
149 size_t end;
150};
151
152struct sw_flow_match {
153 struct sw_flow_key *key;
154 struct sw_flow_key_range range;
155 struct sw_flow_mask *mask;
156};
157
158void ovs_match_init(struct sw_flow_match *match,
159 struct sw_flow_key *key, struct sw_flow_mask *mask);
160
112struct arp_eth_header { 161struct arp_eth_header {
113 __be16 ar_hrd; /* format of hardware address */ 162 __be16 ar_hrd; /* format of hardware address */
114 __be16 ar_pro; /* format of protocol address */ 163 __be16 ar_pro; /* format of protocol address */
@@ -128,29 +177,30 @@ void ovs_flow_exit(void);
128 177
129struct sw_flow *ovs_flow_alloc(void); 178struct sw_flow *ovs_flow_alloc(void);
130void ovs_flow_deferred_free(struct sw_flow *); 179void ovs_flow_deferred_free(struct sw_flow *);
131void ovs_flow_free(struct sw_flow *flow); 180void ovs_flow_free(struct sw_flow *, bool deferred);
132 181
133struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *); 182struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len);
134void ovs_flow_deferred_free_acts(struct sw_flow_actions *); 183void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
135 184
136int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, 185int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
137 int *key_lenp);
138void ovs_flow_used(struct sw_flow *, struct sk_buff *); 186void ovs_flow_used(struct sw_flow *, struct sk_buff *);
139u64 ovs_flow_used_time(unsigned long flow_jiffies); 187u64 ovs_flow_used_time(unsigned long flow_jiffies);
140 188int ovs_flow_to_nlattrs(const struct sw_flow_key *,
141int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); 189 const struct sw_flow_key *, struct sk_buff *);
142int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, 190int ovs_match_from_nlattrs(struct sw_flow_match *match,
191 const struct nlattr *,
143 const struct nlattr *); 192 const struct nlattr *);
144int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, 193int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
145 const struct nlattr *); 194 const struct nlattr *attr);
146 195
147#define MAX_ACTIONS_BUFSIZE (16 * 1024) 196#define MAX_ACTIONS_BUFSIZE (32 * 1024)
148#define TBL_MIN_BUCKETS 1024 197#define TBL_MIN_BUCKETS 1024
149 198
150struct flow_table { 199struct flow_table {
151 struct flex_array *buckets; 200 struct flex_array *buckets;
152 unsigned int count, n_buckets; 201 unsigned int count, n_buckets;
153 struct rcu_head rcu; 202 struct rcu_head rcu;
203 struct list_head *mask_list;
154 int node_ver; 204 int node_ver;
155 u32 hash_seed; 205 u32 hash_seed;
156 bool keep_flows; 206 bool keep_flows;
@@ -166,18 +216,44 @@ static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
166 return (table->count > table->n_buckets); 216 return (table->count > table->n_buckets);
167} 217}
168 218
169struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, 219struct sw_flow *ovs_flow_lookup(struct flow_table *,
170 struct sw_flow_key *key, int len); 220 const struct sw_flow_key *);
171void ovs_flow_tbl_destroy(struct flow_table *table); 221struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
172void ovs_flow_tbl_deferred_destroy(struct flow_table *table); 222 struct sw_flow_match *match);
223
224void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
173struct flow_table *ovs_flow_tbl_alloc(int new_size); 225struct flow_table *ovs_flow_tbl_alloc(int new_size);
174struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); 226struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
175struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); 227struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
176void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow);
177void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
178u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len);
179 228
180struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); 229void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow);
230void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow);
231
232struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx);
181extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; 233extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
234int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
235 struct sw_flow_match *match, bool is_mask);
236int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
237 const struct ovs_key_ipv4_tunnel *tun_key,
238 const struct ovs_key_ipv4_tunnel *output);
239
240bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
241 const struct sw_flow_key *key, int key_end);
242
243struct sw_flow_mask {
244 int ref_count;
245 struct rcu_head rcu;
246 struct list_head list;
247 struct sw_flow_key_range range;
248 struct sw_flow_key key;
249};
182 250
251struct sw_flow_mask *ovs_sw_flow_mask_alloc(void);
252void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *);
253void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred);
254void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *);
255struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *,
256 const struct sw_flow_mask *);
257void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
258 const struct sw_flow_mask *mask);
183#endif /* flow.h */ 259#endif /* flow.h */
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
new file mode 100644
index 000000000000..c99dea543d64
--- /dev/null
+++ b/net/openvswitch/vport-gre.c
@@ -0,0 +1,272 @@
1/*
2 * Copyright (c) 2007-2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/if.h>
22#include <linux/skbuff.h>
23#include <linux/ip.h>
24#include <linux/if_tunnel.h>
25#include <linux/if_vlan.h>
26#include <linux/in.h>
27#include <linux/if_vlan.h>
28#include <linux/in.h>
29#include <linux/in_route.h>
30#include <linux/inetdevice.h>
31#include <linux/jhash.h>
32#include <linux/list.h>
33#include <linux/kernel.h>
34#include <linux/workqueue.h>
35#include <linux/rculist.h>
36#include <net/route.h>
37#include <net/xfrm.h>
38
39#include <net/icmp.h>
40#include <net/ip.h>
41#include <net/ip_tunnels.h>
42#include <net/gre.h>
43#include <net/net_namespace.h>
44#include <net/netns/generic.h>
45#include <net/protocol.h>
46
47#include "datapath.h"
48#include "vport.h"
49
50/* Returns the least-significant 32 bits of a __be64. */
51static __be32 be64_get_low32(__be64 x)
52{
53#ifdef __BIG_ENDIAN
54 return (__force __be32)x;
55#else
56 return (__force __be32)((__force u64)x >> 32);
57#endif
58}
59
60static __be16 filter_tnl_flags(__be16 flags)
61{
62 return flags & (TUNNEL_CSUM | TUNNEL_KEY);
63}
64
65static struct sk_buff *__build_header(struct sk_buff *skb,
66 int tunnel_hlen)
67{
68 const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
69 struct tnl_ptk_info tpi;
70
71 skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
72 if (IS_ERR(skb))
73 return NULL;
74
75 tpi.flags = filter_tnl_flags(tun_key->tun_flags);
76 tpi.proto = htons(ETH_P_TEB);
77 tpi.key = be64_get_low32(tun_key->tun_id);
78 tpi.seq = 0;
79 gre_build_header(skb, &tpi, tunnel_hlen);
80
81 return skb;
82}
83
84static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
85{
86#ifdef __BIG_ENDIAN
87 return (__force __be64)((__force u64)seq << 32 | (__force u32)key);
88#else
89 return (__force __be64)((__force u64)key << 32 | (__force u32)seq);
90#endif
91}
92
93/* Called with rcu_read_lock and BH disabled. */
94static int gre_rcv(struct sk_buff *skb,
95 const struct tnl_ptk_info *tpi)
96{
97 struct ovs_key_ipv4_tunnel tun_key;
98 struct ovs_net *ovs_net;
99 struct vport *vport;
100 __be64 key;
101
102 ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
103 vport = rcu_dereference(ovs_net->vport_net.gre_vport);
104 if (unlikely(!vport))
105 return PACKET_REJECT;
106
107 key = key_to_tunnel_id(tpi->key, tpi->seq);
108 ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key,
109 filter_tnl_flags(tpi->flags));
110
111 ovs_vport_receive(vport, skb, &tun_key);
112 return PACKET_RCVD;
113}
114
115static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
116{
117 struct net *net = ovs_dp_get_net(vport->dp);
118 struct flowi4 fl;
119 struct rtable *rt;
120 int min_headroom;
121 int tunnel_hlen;
122 __be16 df;
123 int err;
124
125 if (unlikely(!OVS_CB(skb)->tun_key)) {
126 err = -EINVAL;
127 goto error;
128 }
129
130 /* Route lookup */
131 memset(&fl, 0, sizeof(fl));
132 fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst;
133 fl.saddr = OVS_CB(skb)->tun_key->ipv4_src;
134 fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
135 fl.flowi4_mark = skb->mark;
136 fl.flowi4_proto = IPPROTO_GRE;
137
138 rt = ip_route_output_key(net, &fl);
139 if (IS_ERR(rt))
140 return PTR_ERR(rt);
141
142 tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags);
143
144 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
145 + tunnel_hlen + sizeof(struct iphdr)
146 + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
147 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
148 int head_delta = SKB_DATA_ALIGN(min_headroom -
149 skb_headroom(skb) +
150 16);
151 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
152 0, GFP_ATOMIC);
153 if (unlikely(err))
154 goto err_free_rt;
155 }
156
157 if (vlan_tx_tag_present(skb)) {
158 if (unlikely(!__vlan_put_tag(skb,
159 skb->vlan_proto,
160 vlan_tx_tag_get(skb)))) {
161 err = -ENOMEM;
162 goto err_free_rt;
163 }
164 skb->vlan_tci = 0;
165 }
166
167 /* Push Tunnel header. */
168 skb = __build_header(skb, tunnel_hlen);
169 if (unlikely(!skb)) {
170 err = 0;
171 goto err_free_rt;
172 }
173
174 df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
175 htons(IP_DF) : 0;
176
177 skb->local_df = 1;
178
179 return iptunnel_xmit(rt, skb, fl.saddr,
180 OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
181 OVS_CB(skb)->tun_key->ipv4_tos,
182 OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
183err_free_rt:
184 ip_rt_put(rt);
185error:
186 return err;
187}
188
189static struct gre_cisco_protocol gre_protocol = {
190 .handler = gre_rcv,
191 .priority = 1,
192};
193
194static int gre_ports;
195static int gre_init(void)
196{
197 int err;
198
199 gre_ports++;
200 if (gre_ports > 1)
201 return 0;
202
203 err = gre_cisco_register(&gre_protocol);
204 if (err)
205 pr_warn("cannot register gre protocol handler\n");
206
207 return err;
208}
209
210static void gre_exit(void)
211{
212 gre_ports--;
213 if (gre_ports > 0)
214 return;
215
216 gre_cisco_unregister(&gre_protocol);
217}
218
219static const char *gre_get_name(const struct vport *vport)
220{
221 return vport_priv(vport);
222}
223
224static struct vport *gre_create(const struct vport_parms *parms)
225{
226 struct net *net = ovs_dp_get_net(parms->dp);
227 struct ovs_net *ovs_net;
228 struct vport *vport;
229 int err;
230
231 err = gre_init();
232 if (err)
233 return ERR_PTR(err);
234
235 ovs_net = net_generic(net, ovs_net_id);
236 if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
237 vport = ERR_PTR(-EEXIST);
238 goto error;
239 }
240
241 vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms);
242 if (IS_ERR(vport))
243 goto error;
244
245 strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
246 rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
247 return vport;
248
249error:
250 gre_exit();
251 return vport;
252}
253
254static void gre_tnl_destroy(struct vport *vport)
255{
256 struct net *net = ovs_dp_get_net(vport->dp);
257 struct ovs_net *ovs_net;
258
259 ovs_net = net_generic(net, ovs_net_id);
260
261 rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL);
262 ovs_vport_deferred_free(vport);
263 gre_exit();
264}
265
266const struct vport_ops ovs_gre_vport_ops = {
267 .type = OVS_VPORT_TYPE_GRE,
268 .create = gre_create,
269 .destroy = gre_tnl_destroy,
270 .get_name = gre_get_name,
271 .send = gre_tnl_send,
272};
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 84e0a0379186..98d3edbbc235 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -67,7 +67,7 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde
67static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) 67static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
68{ 68{
69 rcu_read_lock(); 69 rcu_read_lock();
70 ovs_vport_receive(internal_dev_priv(netdev)->vport, skb); 70 ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
71 rcu_read_unlock(); 71 rcu_read_unlock();
72 return 0; 72 return 0;
73} 73}
@@ -221,6 +221,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
221 skb->dev = netdev; 221 skb->dev = netdev;
222 skb->pkt_type = PACKET_HOST; 222 skb->pkt_type = PACKET_HOST;
223 skb->protocol = eth_type_trans(skb, netdev); 223 skb->protocol = eth_type_trans(skb, netdev);
224 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
224 225
225 netif_rx(skb); 226 netif_rx(skb);
226 227
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 4f01c6d2ffa4..09d93c13cfd6 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -25,6 +25,7 @@
25#include <linux/llc.h> 25#include <linux/llc.h>
26#include <linux/rtnetlink.h> 26#include <linux/rtnetlink.h>
27#include <linux/skbuff.h> 27#include <linux/skbuff.h>
28#include <linux/openvswitch.h>
28 29
29#include <net/llc.h> 30#include <net/llc.h>
30 31
@@ -49,7 +50,9 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
49 return; 50 return;
50 51
51 skb_push(skb, ETH_HLEN); 52 skb_push(skb, ETH_HLEN);
52 ovs_vport_receive(vport, skb); 53 ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
54
55 ovs_vport_receive(vport, skb, NULL);
53 return; 56 return;
54 57
55error: 58error:
@@ -72,6 +75,15 @@ static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
72 return RX_HANDLER_CONSUMED; 75 return RX_HANDLER_CONSUMED;
73} 76}
74 77
78static struct net_device *get_dpdev(struct datapath *dp)
79{
80 struct vport *local;
81
82 local = ovs_vport_ovsl(dp, OVSP_LOCAL);
83 BUG_ON(!local);
84 return netdev_vport_priv(local)->dev;
85}
86
75static struct vport *netdev_create(const struct vport_parms *parms) 87static struct vport *netdev_create(const struct vport_parms *parms)
76{ 88{
77 struct vport *vport; 89 struct vport *vport;
@@ -101,10 +113,15 @@ static struct vport *netdev_create(const struct vport_parms *parms)
101 } 113 }
102 114
103 rtnl_lock(); 115 rtnl_lock();
116 err = netdev_master_upper_dev_link(netdev_vport->dev,
117 get_dpdev(vport->dp));
118 if (err)
119 goto error_unlock;
120
104 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, 121 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
105 vport); 122 vport);
106 if (err) 123 if (err)
107 goto error_unlock; 124 goto error_master_upper_dev_unlink;
108 125
109 dev_set_promiscuity(netdev_vport->dev, 1); 126 dev_set_promiscuity(netdev_vport->dev, 1);
110 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; 127 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
@@ -112,6 +129,8 @@ static struct vport *netdev_create(const struct vport_parms *parms)
112 129
113 return vport; 130 return vport;
114 131
132error_master_upper_dev_unlink:
133 netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
115error_unlock: 134error_unlock:
116 rtnl_unlock(); 135 rtnl_unlock();
117error_put: 136error_put:
@@ -138,6 +157,7 @@ static void netdev_destroy(struct vport *vport)
138 rtnl_lock(); 157 rtnl_lock();
139 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; 158 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
140 netdev_rx_handler_unregister(netdev_vport->dev); 159 netdev_rx_handler_unregister(netdev_vport->dev);
160 netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
141 dev_set_promiscuity(netdev_vport->dev, -1); 161 dev_set_promiscuity(netdev_vport->dev, -1);
142 rtnl_unlock(); 162 rtnl_unlock();
143 163
@@ -170,7 +190,7 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb)
170 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", 190 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
171 netdev_vport->dev->name, 191 netdev_vport->dev->name,
172 packet_length(skb), mtu); 192 packet_length(skb), mtu);
173 goto error; 193 goto drop;
174 } 194 }
175 195
176 skb->dev = netdev_vport->dev; 196 skb->dev = netdev_vport->dev;
@@ -179,9 +199,8 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb)
179 199
180 return len; 200 return len;
181 201
182error: 202drop:
183 kfree_skb(skb); 203 kfree_skb(skb);
184 ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
185 return 0; 204 return 0;
186} 205}
187 206
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index a3cb3a32cd77..dd298b5c5cdb 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -39,6 +39,5 @@ netdev_vport_priv(const struct vport *vport)
39} 39}
40 40
41const char *ovs_netdev_get_name(const struct vport *); 41const char *ovs_netdev_get_name(const struct vport *);
42const char *ovs_netdev_get_config(const struct vport *);
43 42
44#endif /* vport_netdev.h */ 43#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
new file mode 100644
index 000000000000..a481c03e2861
--- /dev/null
+++ b/net/openvswitch/vport-vxlan.c
@@ -0,0 +1,204 @@
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 * Copyright (c) 2013 Cisco Systems, Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22#include <linux/in.h>
23#include <linux/ip.h>
24#include <linux/net.h>
25#include <linux/rculist.h>
26#include <linux/udp.h>
27
28#include <net/icmp.h>
29#include <net/ip.h>
30#include <net/udp.h>
31#include <net/ip_tunnels.h>
32#include <net/udp.h>
33#include <net/rtnetlink.h>
34#include <net/route.h>
35#include <net/dsfield.h>
36#include <net/inet_ecn.h>
37#include <net/net_namespace.h>
38#include <net/netns/generic.h>
39#include <net/vxlan.h>
40
41#include "datapath.h"
42#include "vport.h"
43
44/**
45 * struct vxlan_port - Keeps track of open UDP ports
46 * @vs: vxlan_sock created for the port.
47 * @name: vport name.
48 */
49struct vxlan_port {
50 struct vxlan_sock *vs;
51 char name[IFNAMSIZ];
52};
53
54static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
55{
56 return vport_priv(vport);
57}
58
59/* Called with rcu_read_lock and BH disabled. */
60static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
61{
62 struct ovs_key_ipv4_tunnel tun_key;
63 struct vport *vport = vs->data;
64 struct iphdr *iph;
65 __be64 key;
66
67 /* Save outer tunnel values */
68 iph = ip_hdr(skb);
69 key = cpu_to_be64(ntohl(vx_vni) >> 8);
70 ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
71
72 ovs_vport_receive(vport, skb, &tun_key);
73}
74
75static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
76{
77 struct vxlan_port *vxlan_port = vxlan_vport(vport);
78 __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
79
80 if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
81 return -EMSGSIZE;
82 return 0;
83}
84
85static void vxlan_tnl_destroy(struct vport *vport)
86{
87 struct vxlan_port *vxlan_port = vxlan_vport(vport);
88
89 vxlan_sock_release(vxlan_port->vs);
90
91 ovs_vport_deferred_free(vport);
92}
93
94static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
95{
96 struct net *net = ovs_dp_get_net(parms->dp);
97 struct nlattr *options = parms->options;
98 struct vxlan_port *vxlan_port;
99 struct vxlan_sock *vs;
100 struct vport *vport;
101 struct nlattr *a;
102 u16 dst_port;
103 int err;
104
105 if (!options) {
106 err = -EINVAL;
107 goto error;
108 }
109 a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
110 if (a && nla_len(a) == sizeof(u16)) {
111 dst_port = nla_get_u16(a);
112 } else {
113 /* Require destination port from userspace. */
114 err = -EINVAL;
115 goto error;
116 }
117
118 vport = ovs_vport_alloc(sizeof(struct vxlan_port),
119 &ovs_vxlan_vport_ops, parms);
120 if (IS_ERR(vport))
121 return vport;
122
123 vxlan_port = vxlan_vport(vport);
124 strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
125
126 vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false);
127 if (IS_ERR(vs)) {
128 ovs_vport_free(vport);
129 return (void *)vs;
130 }
131 vxlan_port->vs = vs;
132
133 return vport;
134
135error:
136 return ERR_PTR(err);
137}
138
139static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
140{
141 struct net *net = ovs_dp_get_net(vport->dp);
142 struct vxlan_port *vxlan_port = vxlan_vport(vport);
143 __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
144 struct rtable *rt;
145 struct flowi4 fl;
146 __be16 src_port;
147 int port_min;
148 int port_max;
149 __be16 df;
150 int err;
151
152 if (unlikely(!OVS_CB(skb)->tun_key)) {
153 err = -EINVAL;
154 goto error;
155 }
156
157 /* Route lookup */
158 memset(&fl, 0, sizeof(fl));
159 fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst;
160 fl.saddr = OVS_CB(skb)->tun_key->ipv4_src;
161 fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
162 fl.flowi4_mark = skb->mark;
163 fl.flowi4_proto = IPPROTO_UDP;
164
165 rt = ip_route_output_key(net, &fl);
166 if (IS_ERR(rt)) {
167 err = PTR_ERR(rt);
168 goto error;
169 }
170
171 df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
172 htons(IP_DF) : 0;
173
174 skb->local_df = 1;
175
176 inet_get_local_port_range(&port_min, &port_max);
177 src_port = vxlan_src_port(port_min, port_max, skb);
178
179 err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
180 fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst,
181 OVS_CB(skb)->tun_key->ipv4_tos,
182 OVS_CB(skb)->tun_key->ipv4_ttl, df,
183 src_port, dst_port,
184 htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
185 if (err < 0)
186 ip_rt_put(rt);
187error:
188 return err;
189}
190
191static const char *vxlan_get_name(const struct vport *vport)
192{
193 struct vxlan_port *vxlan_port = vxlan_vport(vport);
194 return vxlan_port->name;
195}
196
197const struct vport_ops ovs_vxlan_vport_ops = {
198 .type = OVS_VPORT_TYPE_VXLAN,
199 .create = vxlan_tnl_create,
200 .destroy = vxlan_tnl_destroy,
201 .get_name = vxlan_get_name,
202 .get_options = vxlan_get_options,
203 .send = vxlan_tnl_send,
204};
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 720623190eaa..6f65dbe13812 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -38,6 +38,13 @@
38static const struct vport_ops *vport_ops_list[] = { 38static const struct vport_ops *vport_ops_list[] = {
39 &ovs_netdev_vport_ops, 39 &ovs_netdev_vport_ops,
40 &ovs_internal_vport_ops, 40 &ovs_internal_vport_ops,
41
42#ifdef CONFIG_OPENVSWITCH_GRE
43 &ovs_gre_vport_ops,
44#endif
45#ifdef CONFIG_OPENVSWITCH_VXLAN
46 &ovs_vxlan_vport_ops,
47#endif
41}; 48};
42 49
43/* Protected by RCU read lock for reading, ovs_mutex for writing. */ 50/* Protected by RCU read lock for reading, ovs_mutex for writing. */
@@ -196,7 +203,7 @@ out:
196 * ovs_vport_set_options - modify existing vport device (for kernel callers) 203 * ovs_vport_set_options - modify existing vport device (for kernel callers)
197 * 204 *
198 * @vport: vport to modify. 205 * @vport: vport to modify.
199 * @port: New configuration. 206 * @options: New configuration.
200 * 207 *
201 * Modifies an existing device with the specified configuration (which is 208 * Modifies an existing device with the specified configuration (which is
202 * dependent on device type). ovs_mutex must be held. 209 * dependent on device type). ovs_mutex must be held.
@@ -321,11 +328,13 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
321 * 328 *
322 * @vport: vport that received the packet 329 * @vport: vport that received the packet
323 * @skb: skb that was received 330 * @skb: skb that was received
331 * @tun_key: tunnel (if any) that carried packet
324 * 332 *
325 * Must be called with rcu_read_lock. The packet cannot be shared and 333 * Must be called with rcu_read_lock. The packet cannot be shared and
326 * skb->data should point to the Ethernet header. 334 * skb->data should point to the Ethernet header.
327 */ 335 */
328void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) 336void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
337 struct ovs_key_ipv4_tunnel *tun_key)
329{ 338{
330 struct pcpu_tstats *stats; 339 struct pcpu_tstats *stats;
331 340
@@ -335,6 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
335 stats->rx_bytes += skb->len; 344 stats->rx_bytes += skb->len;
336 u64_stats_update_end(&stats->syncp); 345 u64_stats_update_end(&stats->syncp);
337 346
347 OVS_CB(skb)->tun_key = tun_key;
338 ovs_dp_process_received_packet(vport, skb); 348 ovs_dp_process_received_packet(vport, skb);
339} 349}
340 350
@@ -351,7 +361,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
351{ 361{
352 int sent = vport->ops->send(vport, skb); 362 int sent = vport->ops->send(vport, skb);
353 363
354 if (likely(sent)) { 364 if (likely(sent > 0)) {
355 struct pcpu_tstats *stats; 365 struct pcpu_tstats *stats;
356 366
357 stats = this_cpu_ptr(vport->percpu_stats); 367 stats = this_cpu_ptr(vport->percpu_stats);
@@ -360,7 +370,12 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
360 stats->tx_packets++; 370 stats->tx_packets++;
361 stats->tx_bytes += sent; 371 stats->tx_bytes += sent;
362 u64_stats_update_end(&stats->syncp); 372 u64_stats_update_end(&stats->syncp);
363 } 373 } else if (sent < 0) {
374 ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
375 kfree_skb(skb);
376 } else
377 ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
378
364 return sent; 379 return sent;
365} 380}
366 381
@@ -371,7 +386,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
371 * @err_type: one of enum vport_err_type types to indicate the error type 386 * @err_type: one of enum vport_err_type types to indicate the error type
372 * 387 *
373 * If using the vport generic stats layer indicate that an error of the given 388 * If using the vport generic stats layer indicate that an error of the given
374 * type has occured. 389 * type has occurred.
375 */ 390 */
376void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) 391void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type)
377{ 392{
@@ -397,3 +412,18 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type)
397 412
398 spin_unlock(&vport->stats_lock); 413 spin_unlock(&vport->stats_lock);
399} 414}
415
416static void free_vport_rcu(struct rcu_head *rcu)
417{
418 struct vport *vport = container_of(rcu, struct vport, rcu);
419
420 ovs_vport_free(vport);
421}
422
423void ovs_vport_deferred_free(struct vport *vport)
424{
425 if (!vport)
426 return;
427
428 call_rcu(&vport->rcu, free_vport_rcu);
429}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 68a377bc0841..1a9fbcec6e1b 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -34,6 +34,11 @@ struct vport_parms;
34 34
35/* The following definitions are for users of the vport subsytem: */ 35/* The following definitions are for users of the vport subsytem: */
36 36
37/* The following definitions are for users of the vport subsytem: */
38struct vport_net {
39 struct vport __rcu *gre_vport;
40};
41
37int ovs_vport_init(void); 42int ovs_vport_init(void);
38void ovs_vport_exit(void); 43void ovs_vport_exit(void);
39 44
@@ -123,9 +128,8 @@ struct vport_parms {
123 * existing vport to a &struct sk_buff. May be %NULL for a vport that does not 128 * existing vport to a &struct sk_buff. May be %NULL for a vport that does not
124 * have any configuration. 129 * have any configuration.
125 * @get_name: Get the device's name. 130 * @get_name: Get the device's name.
126 * @get_config: Get the device's configuration. 131 * @send: Send a packet on the device. Returns the length of the packet sent,
127 * May be null if the device does not have an ifindex. 132 * zero for dropped packets or negative for error.
128 * @send: Send a packet on the device. Returns the length of the packet sent.
129 */ 133 */
130struct vport_ops { 134struct vport_ops {
131 enum ovs_vport_type type; 135 enum ovs_vport_type type;
@@ -139,7 +143,6 @@ struct vport_ops {
139 143
140 /* Called with rcu_read_lock or ovs_mutex. */ 144 /* Called with rcu_read_lock or ovs_mutex. */
141 const char *(*get_name)(const struct vport *); 145 const char *(*get_name)(const struct vport *);
142 void (*get_config)(const struct vport *, void *);
143 146
144 int (*send)(struct vport *, struct sk_buff *); 147 int (*send)(struct vport *, struct sk_buff *);
145}; 148};
@@ -154,6 +157,7 @@ enum vport_err_type {
154struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, 157struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
155 const struct vport_parms *); 158 const struct vport_parms *);
156void ovs_vport_free(struct vport *); 159void ovs_vport_free(struct vport *);
160void ovs_vport_deferred_free(struct vport *vport);
157 161
158#define VPORT_ALIGN 8 162#define VPORT_ALIGN 8
159 163
@@ -186,12 +190,22 @@ static inline struct vport *vport_from_priv(const void *priv)
186 return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); 190 return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
187} 191}
188 192
189void ovs_vport_receive(struct vport *, struct sk_buff *); 193void ovs_vport_receive(struct vport *, struct sk_buff *,
194 struct ovs_key_ipv4_tunnel *);
190void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); 195void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);
191 196
192/* List of statically compiled vport implementations. Don't forget to also 197/* List of statically compiled vport implementations. Don't forget to also
193 * add yours to the list at the top of vport.c. */ 198 * add yours to the list at the top of vport.c. */
194extern const struct vport_ops ovs_netdev_vport_ops; 199extern const struct vport_ops ovs_netdev_vport_ops;
195extern const struct vport_ops ovs_internal_vport_ops; 200extern const struct vport_ops ovs_internal_vport_ops;
201extern const struct vport_ops ovs_gre_vport_ops;
202extern const struct vport_ops ovs_vxlan_vport_ops;
203
204static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
205 const void *start, unsigned int len)
206{
207 if (skb->ip_summed == CHECKSUM_COMPLETE)
208 skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
209}
196 210
197#endif /* vport.h */ 211#endif /* vport.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8ec1bca7f859..2e8286b47c28 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -88,7 +88,7 @@
88#include <linux/virtio_net.h> 88#include <linux/virtio_net.h>
89#include <linux/errqueue.h> 89#include <linux/errqueue.h>
90#include <linux/net_tstamp.h> 90#include <linux/net_tstamp.h>
91 91#include <linux/reciprocal_div.h>
92#ifdef CONFIG_INET 92#ifdef CONFIG_INET
93#include <net/inet_common.h> 93#include <net/inet_common.h>
94#endif 94#endif
@@ -1135,7 +1135,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
1135 struct sk_buff *skb, 1135 struct sk_buff *skb,
1136 unsigned int num) 1136 unsigned int num)
1137{ 1137{
1138 return (((u64)skb->rxhash) * num) >> 32; 1138 return reciprocal_divide(skb->rxhash, num);
1139} 1139}
1140 1140
1141static unsigned int fanout_demux_lb(struct packet_fanout *f, 1141static unsigned int fanout_demux_lb(struct packet_fanout *f,
@@ -1158,6 +1158,13 @@ static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1158 return smp_processor_id() % num; 1158 return smp_processor_id() % num;
1159} 1159}
1160 1160
1161static unsigned int fanout_demux_rnd(struct packet_fanout *f,
1162 struct sk_buff *skb,
1163 unsigned int num)
1164{
1165 return reciprocal_divide(prandom_u32(), num);
1166}
1167
1161static unsigned int fanout_demux_rollover(struct packet_fanout *f, 1168static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1162 struct sk_buff *skb, 1169 struct sk_buff *skb,
1163 unsigned int idx, unsigned int skip, 1170 unsigned int idx, unsigned int skip,
@@ -1215,6 +1222,9 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1215 case PACKET_FANOUT_CPU: 1222 case PACKET_FANOUT_CPU:
1216 idx = fanout_demux_cpu(f, skb, num); 1223 idx = fanout_demux_cpu(f, skb, num);
1217 break; 1224 break;
1225 case PACKET_FANOUT_RND:
1226 idx = fanout_demux_rnd(f, skb, num);
1227 break;
1218 case PACKET_FANOUT_ROLLOVER: 1228 case PACKET_FANOUT_ROLLOVER:
1219 idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); 1229 idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
1220 break; 1230 break;
@@ -1284,6 +1294,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1284 case PACKET_FANOUT_HASH: 1294 case PACKET_FANOUT_HASH:
1285 case PACKET_FANOUT_LB: 1295 case PACKET_FANOUT_LB:
1286 case PACKET_FANOUT_CPU: 1296 case PACKET_FANOUT_CPU:
1297 case PACKET_FANOUT_RND:
1287 break; 1298 break;
1288 default: 1299 default:
1289 return -EINVAL; 1300 return -EINVAL;
@@ -2181,7 +2192,7 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2181 linear = len; 2192 linear = len;
2182 2193
2183 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 2194 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2184 err); 2195 err, 0);
2185 if (!skb) 2196 if (!skb)
2186 return NULL; 2197 return NULL;
2187 2198
@@ -2638,51 +2649,6 @@ out:
2638 return err; 2649 return err;
2639} 2650}
2640 2651
2641static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
2642{
2643 struct sock_exterr_skb *serr;
2644 struct sk_buff *skb, *skb2;
2645 int copied, err;
2646
2647 err = -EAGAIN;
2648 skb = skb_dequeue(&sk->sk_error_queue);
2649 if (skb == NULL)
2650 goto out;
2651
2652 copied = skb->len;
2653 if (copied > len) {
2654 msg->msg_flags |= MSG_TRUNC;
2655 copied = len;
2656 }
2657 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2658 if (err)
2659 goto out_free_skb;
2660
2661 sock_recv_timestamp(msg, sk, skb);
2662
2663 serr = SKB_EXT_ERR(skb);
2664 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
2665 sizeof(serr->ee), &serr->ee);
2666
2667 msg->msg_flags |= MSG_ERRQUEUE;
2668 err = copied;
2669
2670 /* Reset and regenerate socket error */
2671 spin_lock_bh(&sk->sk_error_queue.lock);
2672 sk->sk_err = 0;
2673 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
2674 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
2675 spin_unlock_bh(&sk->sk_error_queue.lock);
2676 sk->sk_error_report(sk);
2677 } else
2678 spin_unlock_bh(&sk->sk_error_queue.lock);
2679
2680out_free_skb:
2681 kfree_skb(skb);
2682out:
2683 return err;
2684}
2685
2686/* 2652/*
2687 * Pull a packet from our receive queue and hand it to the user. 2653 * Pull a packet from our receive queue and hand it to the user.
2688 * If necessary we block. 2654 * If necessary we block.
@@ -2708,7 +2674,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
2708#endif 2674#endif
2709 2675
2710 if (flags & MSG_ERRQUEUE) { 2676 if (flags & MSG_ERRQUEUE) {
2711 err = packet_recv_error(sk, msg, len); 2677 err = sock_recv_errqueue(sk, msg, len,
2678 SOL_PACKET, PACKET_TX_TIMESTAMP);
2712 goto out; 2679 goto out;
2713 } 2680 }
2714 2681
@@ -2851,12 +2818,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
2851 return -EOPNOTSUPP; 2818 return -EOPNOTSUPP;
2852 2819
2853 uaddr->sa_family = AF_PACKET; 2820 uaddr->sa_family = AF_PACKET;
2821 memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
2854 rcu_read_lock(); 2822 rcu_read_lock();
2855 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); 2823 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
2856 if (dev) 2824 if (dev)
2857 strncpy(uaddr->sa_data, dev->name, 14); 2825 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
2858 else
2859 memset(uaddr->sa_data, 0, 14);
2860 rcu_read_unlock(); 2826 rcu_read_unlock();
2861 *uaddr_len = sizeof(*uaddr); 2827 *uaddr_len = sizeof(*uaddr);
2862 2828
@@ -3260,9 +3226,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3260 3226
3261 if (po->tp_version == TPACKET_V3) { 3227 if (po->tp_version == TPACKET_V3) {
3262 lv = sizeof(struct tpacket_stats_v3); 3228 lv = sizeof(struct tpacket_stats_v3);
3229 st.stats3.tp_packets += st.stats3.tp_drops;
3263 data = &st.stats3; 3230 data = &st.stats3;
3264 } else { 3231 } else {
3265 lv = sizeof(struct tpacket_stats); 3232 lv = sizeof(struct tpacket_stats);
3233 st.stats1.tp_packets += st.stats1.tp_drops;
3266 data = &st.stats1; 3234 data = &st.stats1;
3267 } 3235 }
3268 3236
@@ -3331,10 +3299,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3331} 3299}
3332 3300
3333 3301
3334static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) 3302static int packet_notifier(struct notifier_block *this,
3303 unsigned long msg, void *ptr)
3335{ 3304{
3336 struct sock *sk; 3305 struct sock *sk;
3337 struct net_device *dev = data; 3306 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3338 struct net *net = dev_net(dev); 3307 struct net *net = dev_net(dev);
3339 3308
3340 rcu_read_lock(); 3309 rcu_read_lock();
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 45a7df6575de..56a6146ac94b 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -292,9 +292,9 @@ static void phonet_route_autodel(struct net_device *dev)
292 292
293/* notify Phonet of device events */ 293/* notify Phonet of device events */
294static int phonet_device_notify(struct notifier_block *me, unsigned long what, 294static int phonet_device_notify(struct notifier_block *me, unsigned long what,
295 void *arg) 295 void *ptr)
296{ 296{
297 struct net_device *dev = arg; 297 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
298 298
299 switch (what) { 299 switch (what) {
300 case NETDEV_REGISTER: 300 case NETDEV_REGISTER:
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1afd1381cdc7..77e38f733496 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -793,7 +793,7 @@ static int pn_res_seq_show(struct seq_file *seq, void *v)
793 struct sock **psk = v; 793 struct sock **psk = v;
794 struct sock *sk = *psk; 794 struct sock *sk = *psk;
795 795
796 seq_printf(seq, "%02X %5d %lu%n", 796 seq_printf(seq, "%02X %5u %lu%n",
797 (int) (psk - pnres.sk), 797 (int) (psk - pnres.sk),
798 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), 798 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
799 sock_i_ino(sk), &len); 799 sock_i_ino(sk), &len);
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
index d6bbbbd0af18..c02a8c4bc11f 100644
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c
@@ -61,13 +61,13 @@ void phonet_get_local_port_range(int *min, int *max)
61 } while (read_seqretry(&local_port_range_lock, seq)); 61 } while (read_seqretry(&local_port_range_lock, seq));
62} 62}
63 63
64static int proc_local_port_range(ctl_table *table, int write, 64static int proc_local_port_range(struct ctl_table *table, int write,
65 void __user *buffer, 65 void __user *buffer,
66 size_t *lenp, loff_t *ppos) 66 size_t *lenp, loff_t *ppos)
67{ 67{
68 int ret; 68 int ret;
69 int range[2] = {local_port_range[0], local_port_range[1]}; 69 int range[2] = {local_port_range[0], local_port_range[1]};
70 ctl_table tmp = { 70 struct ctl_table tmp = {
71 .data = &range, 71 .data = &range,
72 .maxlen = sizeof(range), 72 .maxlen = sizeof(range),
73 .mode = table->mode, 73 .mode = table->mode,
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
index 7e643bafb4af..e4e41b3afce7 100644
--- a/net/rds/ib_sysctl.c
+++ b/net/rds/ib_sysctl.c
@@ -61,7 +61,7 @@ static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64;
61 */ 61 */
62unsigned int rds_ib_sysctl_flow_control = 0; 62unsigned int rds_ib_sysctl_flow_control = 0;
63 63
64static ctl_table rds_ib_sysctl_table[] = { 64static struct ctl_table rds_ib_sysctl_table[] = {
65 { 65 {
66 .procname = "max_send_wr", 66 .procname = "max_send_wr",
67 .data = &rds_ib_sysctl_max_send_wr, 67 .data = &rds_ib_sysctl_max_send_wr,
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
index 5d5ebd576f3f..89c91515ed0c 100644
--- a/net/rds/iw_sysctl.c
+++ b/net/rds/iw_sysctl.c
@@ -55,7 +55,7 @@ static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL;
55 55
56unsigned int rds_iw_sysctl_flow_control = 1; 56unsigned int rds_iw_sysctl_flow_control = 1;
57 57
58static ctl_table rds_iw_sysctl_table[] = { 58static struct ctl_table rds_iw_sysctl_table[] = {
59 { 59 {
60 .procname = "max_send_wr", 60 .procname = "max_send_wr",
61 .data = &rds_iw_sysctl_max_send_wr, 61 .data = &rds_iw_sysctl_max_send_wr,
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index 907214b4c4d0..b5cb2aa08f33 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -49,7 +49,7 @@ unsigned int rds_sysctl_max_unacked_bytes = (16 << 20);
49 49
50unsigned int rds_sysctl_ping_enable = 1; 50unsigned int rds_sysctl_ping_enable = 1;
51 51
52static ctl_table rds_sysctl_rds_table[] = { 52static struct ctl_table rds_sysctl_rds_table[] = {
53 { 53 {
54 .procname = "reconnect_min_delay_ms", 54 .procname = "reconnect_min_delay_ms",
55 .data = &rds_sysctl_reconnect_min_jiffies, 55 .data = &rds_sysctl_reconnect_min_jiffies,
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 1cec5e4f3a5e..1bacc1079942 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -576,14 +576,14 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
576} 576}
577EXPORT_SYMBOL(rfkill_set_states); 577EXPORT_SYMBOL(rfkill_set_states);
578 578
579static ssize_t rfkill_name_show(struct device *dev, 579static ssize_t name_show(struct device *dev, struct device_attribute *attr,
580 struct device_attribute *attr, 580 char *buf)
581 char *buf)
582{ 581{
583 struct rfkill *rfkill = to_rfkill(dev); 582 struct rfkill *rfkill = to_rfkill(dev);
584 583
585 return sprintf(buf, "%s\n", rfkill->name); 584 return sprintf(buf, "%s\n", rfkill->name);
586} 585}
586static DEVICE_ATTR_RO(name);
587 587
588static const char *rfkill_get_type_str(enum rfkill_type type) 588static const char *rfkill_get_type_str(enum rfkill_type type)
589{ 589{
@@ -611,54 +611,52 @@ static const char *rfkill_get_type_str(enum rfkill_type type)
611 } 611 }
612} 612}
613 613
614static ssize_t rfkill_type_show(struct device *dev, 614static ssize_t type_show(struct device *dev, struct device_attribute *attr,
615 struct device_attribute *attr, 615 char *buf)
616 char *buf)
617{ 616{
618 struct rfkill *rfkill = to_rfkill(dev); 617 struct rfkill *rfkill = to_rfkill(dev);
619 618
620 return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type)); 619 return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type));
621} 620}
621static DEVICE_ATTR_RO(type);
622 622
623static ssize_t rfkill_idx_show(struct device *dev, 623static ssize_t index_show(struct device *dev, struct device_attribute *attr,
624 struct device_attribute *attr, 624 char *buf)
625 char *buf)
626{ 625{
627 struct rfkill *rfkill = to_rfkill(dev); 626 struct rfkill *rfkill = to_rfkill(dev);
628 627
629 return sprintf(buf, "%d\n", rfkill->idx); 628 return sprintf(buf, "%d\n", rfkill->idx);
630} 629}
630static DEVICE_ATTR_RO(index);
631 631
632static ssize_t rfkill_persistent_show(struct device *dev, 632static ssize_t persistent_show(struct device *dev,
633 struct device_attribute *attr, 633 struct device_attribute *attr, char *buf)
634 char *buf)
635{ 634{
636 struct rfkill *rfkill = to_rfkill(dev); 635 struct rfkill *rfkill = to_rfkill(dev);
637 636
638 return sprintf(buf, "%d\n", rfkill->persistent); 637 return sprintf(buf, "%d\n", rfkill->persistent);
639} 638}
639static DEVICE_ATTR_RO(persistent);
640 640
641static ssize_t rfkill_hard_show(struct device *dev, 641static ssize_t hard_show(struct device *dev, struct device_attribute *attr,
642 struct device_attribute *attr, 642 char *buf)
643 char *buf)
644{ 643{
645 struct rfkill *rfkill = to_rfkill(dev); 644 struct rfkill *rfkill = to_rfkill(dev);
646 645
647 return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0 ); 646 return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0 );
648} 647}
648static DEVICE_ATTR_RO(hard);
649 649
650static ssize_t rfkill_soft_show(struct device *dev, 650static ssize_t soft_show(struct device *dev, struct device_attribute *attr,
651 struct device_attribute *attr, 651 char *buf)
652 char *buf)
653{ 652{
654 struct rfkill *rfkill = to_rfkill(dev); 653 struct rfkill *rfkill = to_rfkill(dev);
655 654
656 return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0 ); 655 return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0 );
657} 656}
658 657
659static ssize_t rfkill_soft_store(struct device *dev, 658static ssize_t soft_store(struct device *dev, struct device_attribute *attr,
660 struct device_attribute *attr, 659 const char *buf, size_t count)
661 const char *buf, size_t count)
662{ 660{
663 struct rfkill *rfkill = to_rfkill(dev); 661 struct rfkill *rfkill = to_rfkill(dev);
664 unsigned long state; 662 unsigned long state;
@@ -680,6 +678,7 @@ static ssize_t rfkill_soft_store(struct device *dev,
680 678
681 return count; 679 return count;
682} 680}
681static DEVICE_ATTR_RW(soft);
683 682
684static u8 user_state_from_blocked(unsigned long state) 683static u8 user_state_from_blocked(unsigned long state)
685{ 684{
@@ -691,18 +690,16 @@ static u8 user_state_from_blocked(unsigned long state)
691 return RFKILL_USER_STATE_UNBLOCKED; 690 return RFKILL_USER_STATE_UNBLOCKED;
692} 691}
693 692
694static ssize_t rfkill_state_show(struct device *dev, 693static ssize_t state_show(struct device *dev, struct device_attribute *attr,
695 struct device_attribute *attr, 694 char *buf)
696 char *buf)
697{ 695{
698 struct rfkill *rfkill = to_rfkill(dev); 696 struct rfkill *rfkill = to_rfkill(dev);
699 697
700 return sprintf(buf, "%d\n", user_state_from_blocked(rfkill->state)); 698 return sprintf(buf, "%d\n", user_state_from_blocked(rfkill->state));
701} 699}
702 700
703static ssize_t rfkill_state_store(struct device *dev, 701static ssize_t state_store(struct device *dev, struct device_attribute *attr,
704 struct device_attribute *attr, 702 const char *buf, size_t count)
705 const char *buf, size_t count)
706{ 703{
707 struct rfkill *rfkill = to_rfkill(dev); 704 struct rfkill *rfkill = to_rfkill(dev);
708 unsigned long state; 705 unsigned long state;
@@ -725,32 +722,27 @@ static ssize_t rfkill_state_store(struct device *dev,
725 722
726 return count; 723 return count;
727} 724}
725static DEVICE_ATTR_RW(state);
728 726
729static ssize_t rfkill_claim_show(struct device *dev, 727static ssize_t claim_show(struct device *dev, struct device_attribute *attr,
730 struct device_attribute *attr, 728 char *buf)
731 char *buf)
732{ 729{
733 return sprintf(buf, "%d\n", 0); 730 return sprintf(buf, "%d\n", 0);
734} 731}
735 732static DEVICE_ATTR_RO(claim);
736static ssize_t rfkill_claim_store(struct device *dev, 733
737 struct device_attribute *attr, 734static struct attribute *rfkill_dev_attrs[] = {
738 const char *buf, size_t count) 735 &dev_attr_name.attr,
739{ 736 &dev_attr_type.attr,
740 return -EOPNOTSUPP; 737 &dev_attr_index.attr,
741} 738 &dev_attr_persistent.attr,
742 739 &dev_attr_state.attr,
743static struct device_attribute rfkill_dev_attrs[] = { 740 &dev_attr_claim.attr,
744 __ATTR(name, S_IRUGO, rfkill_name_show, NULL), 741 &dev_attr_soft.attr,
745 __ATTR(type, S_IRUGO, rfkill_type_show, NULL), 742 &dev_attr_hard.attr,
746 __ATTR(index, S_IRUGO, rfkill_idx_show, NULL), 743 NULL,
747 __ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL),
748 __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
749 __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
750 __ATTR(soft, S_IRUGO|S_IWUSR, rfkill_soft_show, rfkill_soft_store),
751 __ATTR(hard, S_IRUGO, rfkill_hard_show, NULL),
752 __ATTR_NULL
753}; 744};
745ATTRIBUTE_GROUPS(rfkill_dev);
754 746
755static void rfkill_release(struct device *dev) 747static void rfkill_release(struct device *dev)
756{ 748{
@@ -830,7 +822,7 @@ static int rfkill_resume(struct device *dev)
830static struct class rfkill_class = { 822static struct class rfkill_class = {
831 .name = "rfkill", 823 .name = "rfkill",
832 .dev_release = rfkill_release, 824 .dev_release = rfkill_release,
833 .dev_attrs = rfkill_dev_attrs, 825 .dev_groups = rfkill_dev_groups,
834 .dev_uevent = rfkill_dev_uevent, 826 .dev_uevent = rfkill_dev_uevent,
835 .suspend = rfkill_suspend, 827 .suspend = rfkill_suspend,
836 .resume = rfkill_resume, 828 .resume = rfkill_resume,
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
index d11ac79246e4..cf5b145902e5 100644
--- a/net/rfkill/rfkill-regulator.c
+++ b/net/rfkill/rfkill-regulator.c
@@ -30,6 +30,7 @@ struct rfkill_regulator_data {
30static int rfkill_regulator_set_block(void *data, bool blocked) 30static int rfkill_regulator_set_block(void *data, bool blocked)
31{ 31{
32 struct rfkill_regulator_data *rfkill_data = data; 32 struct rfkill_regulator_data *rfkill_data = data;
33 int ret = 0;
33 34
34 pr_debug("%s: blocked: %d\n", __func__, blocked); 35 pr_debug("%s: blocked: %d\n", __func__, blocked);
35 36
@@ -40,15 +41,16 @@ static int rfkill_regulator_set_block(void *data, bool blocked)
40 } 41 }
41 } else { 42 } else {
42 if (!rfkill_data->reg_enabled) { 43 if (!rfkill_data->reg_enabled) {
43 regulator_enable(rfkill_data->vcc); 44 ret = regulator_enable(rfkill_data->vcc);
44 rfkill_data->reg_enabled = true; 45 if (!ret)
46 rfkill_data->reg_enabled = true;
45 } 47 }
46 } 48 }
47 49
48 pr_debug("%s: regulator_is_enabled after set_block: %d\n", __func__, 50 pr_debug("%s: regulator_is_enabled after set_block: %d\n", __func__,
49 regulator_is_enabled(rfkill_data->vcc)); 51 regulator_is_enabled(rfkill_data->vcc));
50 52
51 return 0; 53 return ret;
52} 54}
53 55
54static struct rfkill_ops rfkill_regulator_ops = { 56static struct rfkill_ops rfkill_regulator_ops = {
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 9c8347451597..e98fcfbe6007 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -202,10 +202,10 @@ static void rose_kill_by_device(struct net_device *dev)
202/* 202/*
203 * Handle device status changes. 203 * Handle device status changes.
204 */ 204 */
205static int rose_device_event(struct notifier_block *this, unsigned long event, 205static int rose_device_event(struct notifier_block *this,
206 void *ptr) 206 unsigned long event, void *ptr)
207{ 207{
208 struct net_device *dev = (struct net_device *)ptr; 208 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
209 209
210 if (!net_eq(dev_net(dev), &init_net)) 210 if (!net_eq(dev_net(dev), &init_net))
211 return NOTIFY_DONE; 211 return NOTIFY_DONE;
diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
index 94ca9c2ccd69..89a9278795a9 100644
--- a/net/rose/sysctl_net_rose.c
+++ b/net/rose/sysctl_net_rose.c
@@ -24,7 +24,7 @@ static int min_window[] = {1}, max_window[] = {7};
24 24
25static struct ctl_table_header *rose_table_header; 25static struct ctl_table_header *rose_table_header;
26 26
27static ctl_table rose_table[] = { 27static struct ctl_table rose_table[] = {
28 { 28 {
29 .procname = "restart_request_timeout", 29 .procname = "restart_request_timeout",
30 .data = &sysctl_rose_restart_request_timeout, 30 .data = &sysctl_rose_restart_request_timeout,
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 235e01acac51..c03a32a0418e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -272,6 +272,20 @@ config NET_SCH_FQ_CODEL
272 272
273 If unsure, say N. 273 If unsure, say N.
274 274
275config NET_SCH_FQ
276 tristate "Fair Queue"
277 help
278 Say Y here if you want to use the FQ packet scheduling algorithm.
279
280 FQ does flow separation, and is able to respect pacing requirements
281 set by TCP stack into sk->sk_pacing_rate (for localy generated
282 traffic)
283
284 To compile this driver as a module, choose M here: the module
285 will be called sch_fq.
286
287 If unsure, say N.
288
275config NET_SCH_INGRESS 289config NET_SCH_INGRESS
276 tristate "Ingress Qdisc" 290 tristate "Ingress Qdisc"
277 depends on NET_CLS_ACT 291 depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 978cbf004e80..e5f9abe9a5db 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
39obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o 39obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
40obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o 40obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
41obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o 41obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
42obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
42 43
43obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 44obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
44obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 45obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5d676edc22a6..977c10e0631b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -243,7 +243,7 @@ nla_put_failure:
243static int mirred_device_event(struct notifier_block *unused, 243static int mirred_device_event(struct notifier_block *unused,
244 unsigned long event, void *ptr) 244 unsigned long event, void *ptr)
245{ 245{
246 struct net_device *dev = ptr; 246 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
247 struct tcf_mirred *m; 247 struct tcf_mirred *m;
248 248
249 if (event == NETDEV_UNREGISTER) 249 if (event == NETDEV_UNREGISTER)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3a294eb98d61..867b4a3e3980 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,19 +23,18 @@
23#include <net/sock.h> 23#include <net/sock.h>
24#include <net/cls_cgroup.h> 24#include <net/cls_cgroup.h>
25 25
26static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp) 26static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
27{ 27{
28 return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id), 28 return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
29 struct cgroup_cls_state, css);
30} 29}
31 30
32static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) 31static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
33{ 32{
34 return container_of(task_subsys_state(p, net_cls_subsys_id), 33 return css_cls_state(task_css(p, net_cls_subsys_id));
35 struct cgroup_cls_state, css);
36} 34}
37 35
38static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) 36static struct cgroup_subsys_state *
37cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
39{ 38{
40 struct cgroup_cls_state *cs; 39 struct cgroup_cls_state *cs;
41 40
@@ -45,17 +44,19 @@ static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
45 return &cs->css; 44 return &cs->css;
46} 45}
47 46
48static int cgrp_css_online(struct cgroup *cgrp) 47static int cgrp_css_online(struct cgroup_subsys_state *css)
49{ 48{
50 if (cgrp->parent) 49 struct cgroup_cls_state *cs = css_cls_state(css);
51 cgrp_cls_state(cgrp)->classid = 50 struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
52 cgrp_cls_state(cgrp->parent)->classid; 51
52 if (parent)
53 cs->classid = parent->classid;
53 return 0; 54 return 0;
54} 55}
55 56
56static void cgrp_css_free(struct cgroup *cgrp) 57static void cgrp_css_free(struct cgroup_subsys_state *css)
57{ 58{
58 kfree(cgrp_cls_state(cgrp)); 59 kfree(css_cls_state(css));
59} 60}
60 61
61static int update_classid(const void *v, struct file *file, unsigned n) 62static int update_classid(const void *v, struct file *file, unsigned n)
@@ -67,12 +68,13 @@ static int update_classid(const void *v, struct file *file, unsigned n)
67 return 0; 68 return 0;
68} 69}
69 70
70static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 71static void cgrp_attach(struct cgroup_subsys_state *css,
72 struct cgroup_taskset *tset)
71{ 73{
72 struct task_struct *p; 74 struct task_struct *p;
73 void *v; 75 void *v;
74 76
75 cgroup_taskset_for_each(p, cgrp, tset) { 77 cgroup_taskset_for_each(p, css, tset) {
76 task_lock(p); 78 task_lock(p);
77 v = (void *)(unsigned long)task_cls_classid(p); 79 v = (void *)(unsigned long)task_cls_classid(p);
78 iterate_fd(p->files, 0, update_classid, v); 80 iterate_fd(p->files, 0, update_classid, v);
@@ -80,14 +82,15 @@ static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
80 } 82 }
81} 83}
82 84
83static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) 85static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
84{ 86{
85 return cgrp_cls_state(cgrp)->classid; 87 return css_cls_state(css)->classid;
86} 88}
87 89
88static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value) 90static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
91 u64 value)
89{ 92{
90 cgrp_cls_state(cgrp)->classid = (u32) value; 93 css_cls_state(css)->classid = (u32) value;
91 return 0; 94 return 0;
92} 95}
93 96
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2b935e7cfe7b..2adda7fa2d39 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -200,6 +200,58 @@ int unregister_qdisc(struct Qdisc_ops *qops)
200} 200}
201EXPORT_SYMBOL(unregister_qdisc); 201EXPORT_SYMBOL(unregister_qdisc);
202 202
203/* Get default qdisc if not otherwise specified */
204void qdisc_get_default(char *name, size_t len)
205{
206 read_lock(&qdisc_mod_lock);
207 strlcpy(name, default_qdisc_ops->id, len);
208 read_unlock(&qdisc_mod_lock);
209}
210
211static struct Qdisc_ops *qdisc_lookup_default(const char *name)
212{
213 struct Qdisc_ops *q = NULL;
214
215 for (q = qdisc_base; q; q = q->next) {
216 if (!strcmp(name, q->id)) {
217 if (!try_module_get(q->owner))
218 q = NULL;
219 break;
220 }
221 }
222
223 return q;
224}
225
226/* Set new default qdisc to use */
227int qdisc_set_default(const char *name)
228{
229 const struct Qdisc_ops *ops;
230
231 if (!capable(CAP_NET_ADMIN))
232 return -EPERM;
233
234 write_lock(&qdisc_mod_lock);
235 ops = qdisc_lookup_default(name);
236 if (!ops) {
237 /* Not found, drop lock and try to load module */
238 write_unlock(&qdisc_mod_lock);
239 request_module("sch_%s", name);
240 write_lock(&qdisc_mod_lock);
241
242 ops = qdisc_lookup_default(name);
243 }
244
245 if (ops) {
246 /* Set new default */
247 module_put(default_qdisc_ops->owner);
248 default_qdisc_ops = ops;
249 }
250 write_unlock(&qdisc_mod_lock);
251
252 return ops ? 0 : -ENOENT;
253}
254
203/* We know handle. Find qdisc among all qdisc's attached to device 255/* We know handle. Find qdisc among all qdisc's attached to device
204 (root qdisc, all its children, children of children etc.) 256 (root qdisc, all its children, children of children etc.)
205 */ 257 */
@@ -285,28 +337,70 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
285 return q; 337 return q;
286} 338}
287 339
340/* The linklayer setting were not transferred from iproute2, in older
341 * versions, and the rate tables lookup systems have been dropped in
342 * the kernel. To keep backward compatible with older iproute2 tc
343 * utils, we detect the linklayer setting by detecting if the rate
344 * table were modified.
345 *
346 * For linklayer ATM table entries, the rate table will be aligned to
347 * 48 bytes, thus some table entries will contain the same value. The
348 * mpu (min packet unit) is also encoded into the old rate table, thus
349 * starting from the mpu, we find low and high table entries for
350 * mapping this cell. If these entries contain the same value, when
351 * the rate tables have been modified for linklayer ATM.
352 *
353 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
354 * and then roundup to the next cell, calc the table entry one below,
355 * and compare.
356 */
357static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
358{
359 int low = roundup(r->mpu, 48);
360 int high = roundup(low+1, 48);
361 int cell_low = low >> r->cell_log;
362 int cell_high = (high >> r->cell_log) - 1;
363
364 /* rtab is too inaccurate at rates > 100Mbit/s */
365 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
366 pr_debug("TC linklayer: Giving up ATM detection\n");
367 return TC_LINKLAYER_ETHERNET;
368 }
369
370 if ((cell_high > cell_low) && (cell_high < 256)
371 && (rtab[cell_low] == rtab[cell_high])) {
372 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
373 cell_low, cell_high, rtab[cell_high]);
374 return TC_LINKLAYER_ATM;
375 }
376 return TC_LINKLAYER_ETHERNET;
377}
378
288static struct qdisc_rate_table *qdisc_rtab_list; 379static struct qdisc_rate_table *qdisc_rtab_list;
289 380
290struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) 381struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
291{ 382{
292 struct qdisc_rate_table *rtab; 383 struct qdisc_rate_table *rtab;
293 384
385 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
386 nla_len(tab) != TC_RTAB_SIZE)
387 return NULL;
388
294 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { 389 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
295 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) { 390 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
391 !memcmp(&rtab->data, nla_data(tab), 1024)) {
296 rtab->refcnt++; 392 rtab->refcnt++;
297 return rtab; 393 return rtab;
298 } 394 }
299 } 395 }
300 396
301 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
302 nla_len(tab) != TC_RTAB_SIZE)
303 return NULL;
304
305 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); 397 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
306 if (rtab) { 398 if (rtab) {
307 rtab->rate = *r; 399 rtab->rate = *r;
308 rtab->refcnt = 1; 400 rtab->refcnt = 1;
309 memcpy(rtab->data, nla_data(tab), 1024); 401 memcpy(rtab->data, nla_data(tab), 1024);
402 if (r->linklayer == TC_LINKLAYER_UNAWARE)
403 r->linklayer = __detect_linklayer(r, rtab->data);
310 rtab->next = qdisc_rtab_list; 404 rtab->next = qdisc_rtab_list;
311 qdisc_rtab_list = rtab; 405 qdisc_rtab_list = rtab;
312 } 406 }
@@ -1812,6 +1906,7 @@ static int __init pktsched_init(void)
1812 return err; 1906 return err;
1813 } 1907 }
1814 1908
1909 register_qdisc(&pfifo_fast_ops);
1815 register_qdisc(&pfifo_qdisc_ops); 1910 register_qdisc(&pfifo_qdisc_ops);
1816 register_qdisc(&bfifo_qdisc_ops); 1911 register_qdisc(&bfifo_qdisc_ops);
1817 register_qdisc(&pfifo_head_drop_qdisc_ops); 1912 register_qdisc(&pfifo_head_drop_qdisc_ops);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ca8e0a57d945..1f9c31411f19 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -605,6 +605,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
605 struct sockaddr_atmpvc pvc; 605 struct sockaddr_atmpvc pvc;
606 int state; 606 int state;
607 607
608 memset(&pvc, 0, sizeof(pvc));
608 pvc.sap_family = AF_ATMPVC; 609 pvc.sap_family = AF_ATMPVC;
609 pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; 610 pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
610 pvc.sap_addr.vpi = flow->vcc->vpi; 611 pvc.sap_addr.vpi = flow->vcc->vpi;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 1bc210ffcba2..7a42c81a19eb 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -130,7 +130,7 @@ struct cbq_class {
130 psched_time_t penalized; 130 psched_time_t penalized;
131 struct gnet_stats_basic_packed bstats; 131 struct gnet_stats_basic_packed bstats;
132 struct gnet_stats_queue qstats; 132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est; 133 struct gnet_stats_rate_est64 rate_est;
134 struct tc_cbq_xstats xstats; 134 struct tc_cbq_xstats xstats;
135 135
136 struct tcf_proto *filter_list; 136 struct tcf_proto *filter_list;
@@ -1465,6 +1465,7 @@ static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1465 unsigned char *b = skb_tail_pointer(skb); 1465 unsigned char *b = skb_tail_pointer(skb);
1466 struct tc_cbq_wrropt opt; 1466 struct tc_cbq_wrropt opt;
1467 1467
1468 memset(&opt, 0, sizeof(opt));
1468 opt.flags = 0; 1469 opt.flags = 0;
1469 opt.allot = cl->allot; 1470 opt.allot = cl->allot;
1470 opt.priority = cl->priority + 1; 1471 opt.priority = cl->priority + 1;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ef53ab8d0aae..ddd73cb2d7ba 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -438,7 +438,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
438 if (mask != q->tab_mask) { 438 if (mask != q->tab_mask) {
439 struct sk_buff **ntab; 439 struct sk_buff **ntab;
440 440
441 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); 441 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
442 GFP_KERNEL | __GFP_NOWARN);
442 if (!ntab) 443 if (!ntab)
443 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); 444 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
444 if (!ntab) 445 if (!ntab)
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 759b308d1a8d..8302717ea303 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -25,7 +25,7 @@ struct drr_class {
25 25
26 struct gnet_stats_basic_packed bstats; 26 struct gnet_stats_basic_packed bstats;
27 struct gnet_stats_queue qstats; 27 struct gnet_stats_queue qstats;
28 struct gnet_stats_rate_est rate_est; 28 struct gnet_stats_rate_est64 rate_est;
29 struct list_head alist; 29 struct list_head alist;
30 struct Qdisc *qdisc; 30 struct Qdisc *qdisc;
31 31
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
new file mode 100644
index 000000000000..a2fef8b10b96
--- /dev/null
+++ b/net/sched/sch_fq.c
@@ -0,0 +1,817 @@
1/*
2 * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
3 *
4 * Copyright (C) 2013 Eric Dumazet <edumazet@google.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Meant to be mostly used for localy generated traffic :
12 * Fast classification depends on skb->sk being set before reaching us.
13 * If not, (router workload), we use rxhash as fallback, with 32 bits wide hash.
14 * All packets belonging to a socket are considered as a 'flow'.
15 *
16 * Flows are dynamically allocated and stored in a hash table of RB trees
17 * They are also part of one Round Robin 'queues' (new or old flows)
18 *
19 * Burst avoidance (aka pacing) capability :
20 *
21 * Transport (eg TCP) can set in sk->sk_pacing_rate a rate, enqueue a
22 * bunch of packets, and this packet scheduler adds delay between
23 * packets to respect rate limitation.
24 *
25 * enqueue() :
26 * - lookup one RB tree (out of 1024 or more) to find the flow.
27 * If non existent flow, create it, add it to the tree.
28 * Add skb to the per flow list of skb (fifo).
29 * - Use a special fifo for high prio packets
30 *
31 * dequeue() : serves flows in Round Robin
32 * Note : When a flow becomes empty, we do not immediately remove it from
33 * rb trees, for performance reasons (its expected to send additional packets,
34 * or SLAB cache will reuse socket for another flow)
35 */
36
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/kernel.h>
40#include <linux/jiffies.h>
41#include <linux/string.h>
42#include <linux/in.h>
43#include <linux/errno.h>
44#include <linux/init.h>
45#include <linux/skbuff.h>
46#include <linux/slab.h>
47#include <linux/rbtree.h>
48#include <linux/hash.h>
49#include <linux/prefetch.h>
50#include <net/netlink.h>
51#include <net/pkt_sched.h>
52#include <net/sock.h>
53#include <net/tcp_states.h>
54
55/*
56 * Per flow structure, dynamically allocated
57 */
58struct fq_flow {
59 struct sk_buff *head; /* list of skbs for this flow : first skb */
60 union {
61 struct sk_buff *tail; /* last skb in the list */
62 unsigned long age; /* jiffies when flow was emptied, for gc */
63 };
64 struct rb_node fq_node; /* anchor in fq_root[] trees */
65 struct sock *sk;
66 int qlen; /* number of packets in flow queue */
67 int credit;
68 u32 socket_hash; /* sk_hash */
69 struct fq_flow *next; /* next pointer in RR lists, or &detached */
70
71 struct rb_node rate_node; /* anchor in q->delayed tree */
72 u64 time_next_packet;
73};
74
75struct fq_flow_head {
76 struct fq_flow *first;
77 struct fq_flow *last;
78};
79
80struct fq_sched_data {
81 struct fq_flow_head new_flows;
82
83 struct fq_flow_head old_flows;
84
85 struct rb_root delayed; /* for rate limited flows */
86 u64 time_next_delayed_flow;
87
88 struct fq_flow internal; /* for non classified or high prio packets */
89 u32 quantum;
90 u32 initial_quantum;
91 u32 flow_default_rate;/* rate per flow : bytes per second */
92 u32 flow_max_rate; /* optional max rate per flow */
93 u32 flow_plimit; /* max packets per flow */
94 struct rb_root *fq_root;
95 u8 rate_enable;
96 u8 fq_trees_log;
97
98 u32 flows;
99 u32 inactive_flows;
100 u32 throttled_flows;
101
102 u64 stat_gc_flows;
103 u64 stat_internal_packets;
104 u64 stat_tcp_retrans;
105 u64 stat_throttled;
106 u64 stat_flows_plimit;
107 u64 stat_pkts_too_long;
108 u64 stat_allocation_errors;
109 struct qdisc_watchdog watchdog;
110};
111
112/* special value to mark a detached flow (not on old/new list) */
113static struct fq_flow detached, throttled;
114
115static void fq_flow_set_detached(struct fq_flow *f)
116{
117 f->next = &detached;
118}
119
120static bool fq_flow_is_detached(const struct fq_flow *f)
121{
122 return f->next == &detached;
123}
124
125static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
126{
127 struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
128
129 while (*p) {
130 struct fq_flow *aux;
131
132 parent = *p;
133 aux = container_of(parent, struct fq_flow, rate_node);
134 if (f->time_next_packet >= aux->time_next_packet)
135 p = &parent->rb_right;
136 else
137 p = &parent->rb_left;
138 }
139 rb_link_node(&f->rate_node, parent, p);
140 rb_insert_color(&f->rate_node, &q->delayed);
141 q->throttled_flows++;
142 q->stat_throttled++;
143
144 f->next = &throttled;
145 if (q->time_next_delayed_flow > f->time_next_packet)
146 q->time_next_delayed_flow = f->time_next_packet;
147}
148
149
150static struct kmem_cache *fq_flow_cachep __read_mostly;
151
152static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
153{
154 if (head->first)
155 head->last->next = flow;
156 else
157 head->first = flow;
158 head->last = flow;
159 flow->next = NULL;
160}
161
162/* limit number of collected flows per round */
163#define FQ_GC_MAX 8
164#define FQ_GC_AGE (3*HZ)
165
166static bool fq_gc_candidate(const struct fq_flow *f)
167{
168 return fq_flow_is_detached(f) &&
169 time_after(jiffies, f->age + FQ_GC_AGE);
170}
171
172static void fq_gc(struct fq_sched_data *q,
173 struct rb_root *root,
174 struct sock *sk)
175{
176 struct fq_flow *f, *tofree[FQ_GC_MAX];
177 struct rb_node **p, *parent;
178 int fcnt = 0;
179
180 p = &root->rb_node;
181 parent = NULL;
182 while (*p) {
183 parent = *p;
184
185 f = container_of(parent, struct fq_flow, fq_node);
186 if (f->sk == sk)
187 break;
188
189 if (fq_gc_candidate(f)) {
190 tofree[fcnt++] = f;
191 if (fcnt == FQ_GC_MAX)
192 break;
193 }
194
195 if (f->sk > sk)
196 p = &parent->rb_right;
197 else
198 p = &parent->rb_left;
199 }
200
201 q->flows -= fcnt;
202 q->inactive_flows -= fcnt;
203 q->stat_gc_flows += fcnt;
204 while (fcnt) {
205 struct fq_flow *f = tofree[--fcnt];
206
207 rb_erase(&f->fq_node, root);
208 kmem_cache_free(fq_flow_cachep, f);
209 }
210}
211
212static const u8 prio2band[TC_PRIO_MAX + 1] = {
213 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
214};
215
216static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
217{
218 struct rb_node **p, *parent;
219 struct sock *sk = skb->sk;
220 struct rb_root *root;
221 struct fq_flow *f;
222 int band;
223
224 /* warning: no starvation prevention... */
225 band = prio2band[skb->priority & TC_PRIO_MAX];
226 if (unlikely(band == 0))
227 return &q->internal;
228
229 if (unlikely(!sk)) {
230 /* By forcing low order bit to 1, we make sure to not
231 * collide with a local flow (socket pointers are word aligned)
232 */
233 sk = (struct sock *)(skb_get_rxhash(skb) | 1L);
234 }
235
236 root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
237
238 if (q->flows >= (2U << q->fq_trees_log) &&
239 q->inactive_flows > q->flows/2)
240 fq_gc(q, root, sk);
241
242 p = &root->rb_node;
243 parent = NULL;
244 while (*p) {
245 parent = *p;
246
247 f = container_of(parent, struct fq_flow, fq_node);
248 if (f->sk == sk) {
249 /* socket might have been reallocated, so check
250 * if its sk_hash is the same.
251 * It not, we need to refill credit with
252 * initial quantum
253 */
254 if (unlikely(skb->sk &&
255 f->socket_hash != sk->sk_hash)) {
256 f->credit = q->initial_quantum;
257 f->socket_hash = sk->sk_hash;
258 }
259 return f;
260 }
261 if (f->sk > sk)
262 p = &parent->rb_right;
263 else
264 p = &parent->rb_left;
265 }
266
267 f = kmem_cache_zalloc(fq_flow_cachep, GFP_ATOMIC | __GFP_NOWARN);
268 if (unlikely(!f)) {
269 q->stat_allocation_errors++;
270 return &q->internal;
271 }
272 fq_flow_set_detached(f);
273 f->sk = sk;
274 if (skb->sk)
275 f->socket_hash = sk->sk_hash;
276 f->credit = q->initial_quantum;
277
278 rb_link_node(&f->fq_node, parent, p);
279 rb_insert_color(&f->fq_node, root);
280
281 q->flows++;
282 q->inactive_flows++;
283 return f;
284}
285
286
287/* remove one skb from head of flow queue */
288static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
289{
290 struct sk_buff *skb = flow->head;
291
292 if (skb) {
293 flow->head = skb->next;
294 skb->next = NULL;
295 flow->qlen--;
296 sch->qstats.backlog -= qdisc_pkt_len(skb);
297 sch->q.qlen--;
298 }
299 return skb;
300}
301
302/* We might add in the future detection of retransmits
303 * For the time being, just return false
304 */
305static bool skb_is_retransmit(struct sk_buff *skb)
306{
307 return false;
308}
309
310/* add skb to flow queue
311 * flow queue is a linked list, kind of FIFO, except for TCP retransmits
312 * We special case tcp retransmits to be transmitted before other packets.
313 * We rely on fact that TCP retransmits are unlikely, so we do not waste
314 * a separate queue or a pointer.
315 * head-> [retrans pkt 1]
316 * [retrans pkt 2]
317 * [ normal pkt 1]
318 * [ normal pkt 2]
319 * [ normal pkt 3]
320 * tail-> [ normal pkt 4]
321 */
322static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
323{
324 struct sk_buff *prev, *head = flow->head;
325
326 skb->next = NULL;
327 if (!head) {
328 flow->head = skb;
329 flow->tail = skb;
330 return;
331 }
332 if (likely(!skb_is_retransmit(skb))) {
333 flow->tail->next = skb;
334 flow->tail = skb;
335 return;
336 }
337
338 /* This skb is a tcp retransmit,
339 * find the last retrans packet in the queue
340 */
341 prev = NULL;
342 while (skb_is_retransmit(head)) {
343 prev = head;
344 head = head->next;
345 if (!head)
346 break;
347 }
348 if (!prev) { /* no rtx packet in queue, become the new head */
349 skb->next = flow->head;
350 flow->head = skb;
351 } else {
352 if (prev == flow->tail)
353 flow->tail = skb;
354 else
355 skb->next = prev->next;
356 prev->next = skb;
357 }
358}
359
360static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
361{
362 struct fq_sched_data *q = qdisc_priv(sch);
363 struct fq_flow *f;
364
365 if (unlikely(sch->q.qlen >= sch->limit))
366 return qdisc_drop(skb, sch);
367
368 f = fq_classify(skb, q);
369 if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
370 q->stat_flows_plimit++;
371 return qdisc_drop(skb, sch);
372 }
373
374 f->qlen++;
375 flow_queue_add(f, skb);
376 if (skb_is_retransmit(skb))
377 q->stat_tcp_retrans++;
378 sch->qstats.backlog += qdisc_pkt_len(skb);
379 if (fq_flow_is_detached(f)) {
380 fq_flow_add_tail(&q->new_flows, f);
381 if (q->quantum > f->credit)
382 f->credit = q->quantum;
383 q->inactive_flows--;
384 qdisc_unthrottled(sch);
385 }
386 if (unlikely(f == &q->internal)) {
387 q->stat_internal_packets++;
388 qdisc_unthrottled(sch);
389 }
390 sch->q.qlen++;
391
392 return NET_XMIT_SUCCESS;
393}
394
395static void fq_check_throttled(struct fq_sched_data *q, u64 now)
396{
397 struct rb_node *p;
398
399 if (q->time_next_delayed_flow > now)
400 return;
401
402 q->time_next_delayed_flow = ~0ULL;
403 while ((p = rb_first(&q->delayed)) != NULL) {
404 struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
405
406 if (f->time_next_packet > now) {
407 q->time_next_delayed_flow = f->time_next_packet;
408 break;
409 }
410 rb_erase(p, &q->delayed);
411 q->throttled_flows--;
412 fq_flow_add_tail(&q->old_flows, f);
413 }
414}
415
416static struct sk_buff *fq_dequeue(struct Qdisc *sch)
417{
418 struct fq_sched_data *q = qdisc_priv(sch);
419 u64 now = ktime_to_ns(ktime_get());
420 struct fq_flow_head *head;
421 struct sk_buff *skb;
422 struct fq_flow *f;
423 u32 rate;
424
425 skb = fq_dequeue_head(sch, &q->internal);
426 if (skb)
427 goto out;
428 fq_check_throttled(q, now);
429begin:
430 head = &q->new_flows;
431 if (!head->first) {
432 head = &q->old_flows;
433 if (!head->first) {
434 if (q->time_next_delayed_flow != ~0ULL)
435 qdisc_watchdog_schedule_ns(&q->watchdog,
436 q->time_next_delayed_flow);
437 return NULL;
438 }
439 }
440 f = head->first;
441
442 if (f->credit <= 0) {
443 f->credit += q->quantum;
444 head->first = f->next;
445 fq_flow_add_tail(&q->old_flows, f);
446 goto begin;
447 }
448
449 if (unlikely(f->head && now < f->time_next_packet)) {
450 head->first = f->next;
451 fq_flow_set_throttled(q, f);
452 goto begin;
453 }
454
455 skb = fq_dequeue_head(sch, f);
456 if (!skb) {
457 head->first = f->next;
458 /* force a pass through old_flows to prevent starvation */
459 if ((head == &q->new_flows) && q->old_flows.first) {
460 fq_flow_add_tail(&q->old_flows, f);
461 } else {
462 fq_flow_set_detached(f);
463 f->age = jiffies;
464 q->inactive_flows++;
465 }
466 goto begin;
467 }
468 prefetch(&skb->end);
469 f->time_next_packet = now;
470 f->credit -= qdisc_pkt_len(skb);
471
472 if (f->credit > 0 || !q->rate_enable)
473 goto out;
474
475 if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
476 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
477
478 rate = min(rate, q->flow_max_rate);
479 } else {
480 rate = q->flow_max_rate;
481 if (rate == ~0U)
482 goto out;
483 }
484 if (rate) {
485 u32 plen = max(qdisc_pkt_len(skb), q->quantum);
486 u64 len = (u64)plen * NSEC_PER_SEC;
487
488 do_div(len, rate);
489 /* Since socket rate can change later,
490 * clamp the delay to 125 ms.
491 * TODO: maybe segment the too big skb, as in commit
492 * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
493 */
494 if (unlikely(len > 125 * NSEC_PER_MSEC)) {
495 len = 125 * NSEC_PER_MSEC;
496 q->stat_pkts_too_long++;
497 }
498
499 f->time_next_packet = now + len;
500 }
501out:
502 qdisc_bstats_update(sch, skb);
503 qdisc_unthrottled(sch);
504 return skb;
505}
506
507static void fq_reset(struct Qdisc *sch)
508{
509 struct fq_sched_data *q = qdisc_priv(sch);
510 struct rb_root *root;
511 struct sk_buff *skb;
512 struct rb_node *p;
513 struct fq_flow *f;
514 unsigned int idx;
515
516 while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
517 kfree_skb(skb);
518
519 if (!q->fq_root)
520 return;
521
522 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
523 root = &q->fq_root[idx];
524 while ((p = rb_first(root)) != NULL) {
525 f = container_of(p, struct fq_flow, fq_node);
526 rb_erase(p, root);
527
528 while ((skb = fq_dequeue_head(sch, f)) != NULL)
529 kfree_skb(skb);
530
531 kmem_cache_free(fq_flow_cachep, f);
532 }
533 }
534 q->new_flows.first = NULL;
535 q->old_flows.first = NULL;
536 q->delayed = RB_ROOT;
537 q->flows = 0;
538 q->inactive_flows = 0;
539 q->throttled_flows = 0;
540}
541
542static void fq_rehash(struct fq_sched_data *q,
543 struct rb_root *old_array, u32 old_log,
544 struct rb_root *new_array, u32 new_log)
545{
546 struct rb_node *op, **np, *parent;
547 struct rb_root *oroot, *nroot;
548 struct fq_flow *of, *nf;
549 int fcnt = 0;
550 u32 idx;
551
552 for (idx = 0; idx < (1U << old_log); idx++) {
553 oroot = &old_array[idx];
554 while ((op = rb_first(oroot)) != NULL) {
555 rb_erase(op, oroot);
556 of = container_of(op, struct fq_flow, fq_node);
557 if (fq_gc_candidate(of)) {
558 fcnt++;
559 kmem_cache_free(fq_flow_cachep, of);
560 continue;
561 }
562 nroot = &new_array[hash_32((u32)(long)of->sk, new_log)];
563
564 np = &nroot->rb_node;
565 parent = NULL;
566 while (*np) {
567 parent = *np;
568
569 nf = container_of(parent, struct fq_flow, fq_node);
570 BUG_ON(nf->sk == of->sk);
571
572 if (nf->sk > of->sk)
573 np = &parent->rb_right;
574 else
575 np = &parent->rb_left;
576 }
577
578 rb_link_node(&of->fq_node, parent, np);
579 rb_insert_color(&of->fq_node, nroot);
580 }
581 }
582 q->flows -= fcnt;
583 q->inactive_flows -= fcnt;
584 q->stat_gc_flows += fcnt;
585}
586
587static int fq_resize(struct fq_sched_data *q, u32 log)
588{
589 struct rb_root *array;
590 u32 idx;
591
592 if (q->fq_root && log == q->fq_trees_log)
593 return 0;
594
595 array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL);
596 if (!array)
597 return -ENOMEM;
598
599 for (idx = 0; idx < (1U << log); idx++)
600 array[idx] = RB_ROOT;
601
602 if (q->fq_root) {
603 fq_rehash(q, q->fq_root, q->fq_trees_log, array, log);
604 kfree(q->fq_root);
605 }
606 q->fq_root = array;
607 q->fq_trees_log = log;
608
609 return 0;
610}
611
612static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
613 [TCA_FQ_PLIMIT] = { .type = NLA_U32 },
614 [TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 },
615 [TCA_FQ_QUANTUM] = { .type = NLA_U32 },
616 [TCA_FQ_INITIAL_QUANTUM] = { .type = NLA_U32 },
617 [TCA_FQ_RATE_ENABLE] = { .type = NLA_U32 },
618 [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 },
619 [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
620 [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
621};
622
623static int fq_change(struct Qdisc *sch, struct nlattr *opt)
624{
625 struct fq_sched_data *q = qdisc_priv(sch);
626 struct nlattr *tb[TCA_FQ_MAX + 1];
627 int err, drop_count = 0;
628 u32 fq_log;
629
630 if (!opt)
631 return -EINVAL;
632
633 err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy);
634 if (err < 0)
635 return err;
636
637 sch_tree_lock(sch);
638
639 fq_log = q->fq_trees_log;
640
641 if (tb[TCA_FQ_BUCKETS_LOG]) {
642 u32 nval = nla_get_u32(tb[TCA_FQ_BUCKETS_LOG]);
643
644 if (nval >= 1 && nval <= ilog2(256*1024))
645 fq_log = nval;
646 else
647 err = -EINVAL;
648 }
649 if (tb[TCA_FQ_PLIMIT])
650 sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]);
651
652 if (tb[TCA_FQ_FLOW_PLIMIT])
653 q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
654
655 if (tb[TCA_FQ_QUANTUM])
656 q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
657
658 if (tb[TCA_FQ_INITIAL_QUANTUM])
659 q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
660
661 if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
662 q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
663
664 if (tb[TCA_FQ_FLOW_MAX_RATE])
665 q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
666
667 if (tb[TCA_FQ_RATE_ENABLE]) {
668 u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
669
670 if (enable <= 1)
671 q->rate_enable = enable;
672 else
673 err = -EINVAL;
674 }
675
676 if (!err)
677 err = fq_resize(q, fq_log);
678
679 while (sch->q.qlen > sch->limit) {
680 struct sk_buff *skb = fq_dequeue(sch);
681
682 if (!skb)
683 break;
684 kfree_skb(skb);
685 drop_count++;
686 }
687 qdisc_tree_decrease_qlen(sch, drop_count);
688
689 sch_tree_unlock(sch);
690 return err;
691}
692
693static void fq_destroy(struct Qdisc *sch)
694{
695 struct fq_sched_data *q = qdisc_priv(sch);
696
697 fq_reset(sch);
698 kfree(q->fq_root);
699 qdisc_watchdog_cancel(&q->watchdog);
700}
701
702static int fq_init(struct Qdisc *sch, struct nlattr *opt)
703{
704 struct fq_sched_data *q = qdisc_priv(sch);
705 int err;
706
707 sch->limit = 10000;
708 q->flow_plimit = 100;
709 q->quantum = 2 * psched_mtu(qdisc_dev(sch));
710 q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
711 q->flow_default_rate = 0;
712 q->flow_max_rate = ~0U;
713 q->rate_enable = 1;
714 q->new_flows.first = NULL;
715 q->old_flows.first = NULL;
716 q->delayed = RB_ROOT;
717 q->fq_root = NULL;
718 q->fq_trees_log = ilog2(1024);
719 qdisc_watchdog_init(&q->watchdog, sch);
720
721 if (opt)
722 err = fq_change(sch, opt);
723 else
724 err = fq_resize(q, q->fq_trees_log);
725
726 return err;
727}
728
729static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
730{
731 struct fq_sched_data *q = qdisc_priv(sch);
732 struct nlattr *opts;
733
734 opts = nla_nest_start(skb, TCA_OPTIONS);
735 if (opts == NULL)
736 goto nla_put_failure;
737
738 if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
739 nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
740 nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
741 nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
742 nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
743 nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) ||
744 nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
745 nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
746 goto nla_put_failure;
747
748 nla_nest_end(skb, opts);
749 return skb->len;
750
751nla_put_failure:
752 return -1;
753}
754
755static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
756{
757 struct fq_sched_data *q = qdisc_priv(sch);
758 u64 now = ktime_to_ns(ktime_get());
759 struct tc_fq_qd_stats st = {
760 .gc_flows = q->stat_gc_flows,
761 .highprio_packets = q->stat_internal_packets,
762 .tcp_retrans = q->stat_tcp_retrans,
763 .throttled = q->stat_throttled,
764 .flows_plimit = q->stat_flows_plimit,
765 .pkts_too_long = q->stat_pkts_too_long,
766 .allocation_errors = q->stat_allocation_errors,
767 .flows = q->flows,
768 .inactive_flows = q->inactive_flows,
769 .throttled_flows = q->throttled_flows,
770 .time_next_delayed_flow = q->time_next_delayed_flow - now,
771 };
772
773 return gnet_stats_copy_app(d, &st, sizeof(st));
774}
775
776static struct Qdisc_ops fq_qdisc_ops __read_mostly = {
777 .id = "fq",
778 .priv_size = sizeof(struct fq_sched_data),
779
780 .enqueue = fq_enqueue,
781 .dequeue = fq_dequeue,
782 .peek = qdisc_peek_dequeued,
783 .init = fq_init,
784 .reset = fq_reset,
785 .destroy = fq_destroy,
786 .change = fq_change,
787 .dump = fq_dump,
788 .dump_stats = fq_dump_stats,
789 .owner = THIS_MODULE,
790};
791
792static int __init fq_module_init(void)
793{
794 int ret;
795
796 fq_flow_cachep = kmem_cache_create("fq_flow_cache",
797 sizeof(struct fq_flow),
798 0, 0, NULL);
799 if (!fq_flow_cachep)
800 return -ENOMEM;
801
802 ret = register_qdisc(&fq_qdisc_ops);
803 if (ret)
804 kmem_cache_destroy(fq_flow_cachep);
805 return ret;
806}
807
808static void __exit fq_module_exit(void)
809{
810 unregister_qdisc(&fq_qdisc_ops);
811 kmem_cache_destroy(fq_flow_cachep);
812}
813
814module_init(fq_module_init)
815module_exit(fq_module_exit)
816MODULE_AUTHOR("Eric Dumazet");
817MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 20224086cc28..a74e278654aa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -25,10 +25,15 @@
25#include <linux/rcupdate.h> 25#include <linux/rcupdate.h>
26#include <linux/list.h> 26#include <linux/list.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/if_vlan.h>
28#include <net/sch_generic.h> 29#include <net/sch_generic.h>
29#include <net/pkt_sched.h> 30#include <net/pkt_sched.h>
30#include <net/dst.h> 31#include <net/dst.h>
31 32
33/* Qdisc to use by default */
34const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
35EXPORT_SYMBOL(default_qdisc_ops);
36
32/* Main transmission queue. */ 37/* Main transmission queue. */
33 38
34/* Modifications to data participating in scheduling must be protected with 39/* Modifications to data participating in scheduling must be protected with
@@ -207,15 +212,19 @@ void __qdisc_run(struct Qdisc *q)
207 212
208unsigned long dev_trans_start(struct net_device *dev) 213unsigned long dev_trans_start(struct net_device *dev)
209{ 214{
210 unsigned long val, res = dev->trans_start; 215 unsigned long val, res;
211 unsigned int i; 216 unsigned int i;
212 217
218 if (is_vlan_dev(dev))
219 dev = vlan_dev_real_dev(dev);
220 res = dev->trans_start;
213 for (i = 0; i < dev->num_tx_queues; i++) { 221 for (i = 0; i < dev->num_tx_queues; i++) {
214 val = netdev_get_tx_queue(dev, i)->trans_start; 222 val = netdev_get_tx_queue(dev, i)->trans_start;
215 if (val && time_after(val, res)) 223 if (val && time_after(val, res))
216 res = val; 224 res = val;
217 } 225 }
218 dev->trans_start = res; 226 dev->trans_start = res;
227
219 return res; 228 return res;
220} 229}
221EXPORT_SYMBOL(dev_trans_start); 230EXPORT_SYMBOL(dev_trans_start);
@@ -525,12 +534,11 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
525 .dump = pfifo_fast_dump, 534 .dump = pfifo_fast_dump,
526 .owner = THIS_MODULE, 535 .owner = THIS_MODULE,
527}; 536};
528EXPORT_SYMBOL(pfifo_fast_ops);
529 537
530static struct lock_class_key qdisc_tx_busylock; 538static struct lock_class_key qdisc_tx_busylock;
531 539
532struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, 540struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
533 struct Qdisc_ops *ops) 541 const struct Qdisc_ops *ops)
534{ 542{
535 void *p; 543 void *p;
536 struct Qdisc *sch; 544 struct Qdisc *sch;
@@ -574,10 +582,14 @@ errout:
574} 582}
575 583
576struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, 584struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
577 struct Qdisc_ops *ops, unsigned int parentid) 585 const struct Qdisc_ops *ops,
586 unsigned int parentid)
578{ 587{
579 struct Qdisc *sch; 588 struct Qdisc *sch;
580 589
590 if (!try_module_get(ops->owner))
591 goto errout;
592
581 sch = qdisc_alloc(dev_queue, ops); 593 sch = qdisc_alloc(dev_queue, ops);
582 if (IS_ERR(sch)) 594 if (IS_ERR(sch))
583 goto errout; 595 goto errout;
@@ -681,7 +693,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
681 693
682 if (dev->tx_queue_len) { 694 if (dev->tx_queue_len) {
683 qdisc = qdisc_create_dflt(dev_queue, 695 qdisc = qdisc_create_dflt(dev_queue,
684 &pfifo_fast_ops, TC_H_ROOT); 696 default_qdisc_ops, TC_H_ROOT);
685 if (!qdisc) { 697 if (!qdisc) {
686 netdev_info(dev, "activation failed\n"); 698 netdev_info(dev, "activation failed\n");
687 return; 699 return;
@@ -734,9 +746,8 @@ void dev_activate(struct net_device *dev)
734 int need_watchdog; 746 int need_watchdog;
735 747
736 /* No queueing discipline is attached to device; 748 /* No queueing discipline is attached to device;
737 create default one i.e. pfifo_fast for devices, 749 * create default one for devices, which need queueing
738 which need queueing and noqueue_qdisc for 750 * and noqueue_qdisc for virtual interfaces
739 virtual interfaces
740 */ 751 */
741 752
742 if (dev->qdisc == &noop_qdisc) 753 if (dev->qdisc == &noop_qdisc)
@@ -901,37 +912,34 @@ void dev_shutdown(struct net_device *dev)
901void psched_ratecfg_precompute(struct psched_ratecfg *r, 912void psched_ratecfg_precompute(struct psched_ratecfg *r,
902 const struct tc_ratespec *conf) 913 const struct tc_ratespec *conf)
903{ 914{
904 u64 factor;
905 u64 mult;
906 int shift;
907
908 memset(r, 0, sizeof(*r)); 915 memset(r, 0, sizeof(*r));
909 r->overhead = conf->overhead; 916 r->overhead = conf->overhead;
910 r->rate_bps = (u64)conf->rate << 3; 917 r->rate_bytes_ps = conf->rate;
918 r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
911 r->mult = 1; 919 r->mult = 1;
912 /* 920 /*
913 * Calibrate mult, shift so that token counting is accurate 921 * The deal here is to replace a divide by a reciprocal one
914 * for smallest packet size (64 bytes). Token (time in ns) is 922 * in fast path (a reciprocal divide is a multiply and a shift)
915 * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will 923 *
916 * work as long as the smallest packet transfer time can be 924 * Normal formula would be :
917 * accurately represented in nanosec. 925 * time_in_ns = (NSEC_PER_SEC * len) / rate_bps
926 *
927 * We compute mult/shift to use instead :
928 * time_in_ns = (len * mult) >> shift;
929 *
930 * We try to get the highest possible mult value for accuracy,
931 * but have to make sure no overflows will ever happen.
918 */ 932 */
919 if (r->rate_bps > 0) { 933 if (r->rate_bytes_ps > 0) {
920 /* 934 u64 factor = NSEC_PER_SEC;
921 * Higher shift gives better accuracy. Find the largest 935
922 * shift such that mult fits in 32 bits. 936 for (;;) {
923 */ 937 r->mult = div64_u64(factor, r->rate_bytes_ps);
924 for (shift = 0; shift < 16; shift++) { 938 if (r->mult & (1U << 31) || factor & (1ULL << 63))
925 r->shift = shift;
926 factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
927 mult = div64_u64(factor, r->rate_bps);
928 if (mult > UINT_MAX)
929 break; 939 break;
940 factor <<= 1;
941 r->shift++;
930 } 942 }
931
932 r->shift = shift - 1;
933 factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
934 r->mult = div64_u64(factor, r->rate_bps);
935 } 943 }
936} 944}
937EXPORT_SYMBOL(psched_ratecfg_precompute); 945EXPORT_SYMBOL(psched_ratecfg_precompute);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 9facea03faeb..c4075610502c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -114,7 +114,7 @@ struct hfsc_class {
114 114
115 struct gnet_stats_basic_packed bstats; 115 struct gnet_stats_basic_packed bstats;
116 struct gnet_stats_queue qstats; 116 struct gnet_stats_queue qstats;
117 struct gnet_stats_rate_est rate_est; 117 struct gnet_stats_rate_est64 rate_est;
118 unsigned int level; /* class level in hierarchy */ 118 unsigned int level; /* class level in hierarchy */
119 struct tcf_proto *filter_list; /* filter list */ 119 struct tcf_proto *filter_list; /* filter list */
120 unsigned int filter_cnt; /* filter count */ 120 unsigned int filter_cnt; /* filter count */
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index adaedd79389c..863846cc5513 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -65,6 +65,10 @@ static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis f
65module_param (htb_hysteresis, int, 0640); 65module_param (htb_hysteresis, int, 0640);
66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); 66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
67 67
68static int htb_rate_est = 0; /* htb classes have a default rate estimator */
69module_param(htb_rate_est, int, 0640);
70MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
71
68/* used internaly to keep status of single class */ 72/* used internaly to keep status of single class */
69enum htb_cmode { 73enum htb_cmode {
70 HTB_CANT_SEND, /* class can't send and can't borrow */ 74 HTB_CANT_SEND, /* class can't send and can't borrow */
@@ -72,95 +76,105 @@ enum htb_cmode {
72 HTB_CAN_SEND /* class can send */ 76 HTB_CAN_SEND /* class can send */
73}; 77};
74 78
75/* interior & leaf nodes; props specific to leaves are marked L: */ 79struct htb_prio {
80 union {
81 struct rb_root row;
82 struct rb_root feed;
83 };
84 struct rb_node *ptr;
85 /* When class changes from state 1->2 and disconnects from
86 * parent's feed then we lost ptr value and start from the
87 * first child again. Here we store classid of the
88 * last valid ptr (used when ptr is NULL).
89 */
90 u32 last_ptr_id;
91};
92
93/* interior & leaf nodes; props specific to leaves are marked L:
94 * To reduce false sharing, place mostly read fields at beginning,
95 * and mostly written ones at the end.
96 */
76struct htb_class { 97struct htb_class {
77 struct Qdisc_class_common common; 98 struct Qdisc_class_common common;
78 /* general class parameters */ 99 struct psched_ratecfg rate;
79 struct gnet_stats_basic_packed bstats; 100 struct psched_ratecfg ceil;
80 struct gnet_stats_queue qstats; 101 s64 buffer, cbuffer;/* token bucket depth/rate */
81 struct gnet_stats_rate_est rate_est; 102 s64 mbuffer; /* max wait time */
82 struct tc_htb_xstats xstats; /* our special stats */ 103 u32 prio; /* these two are used only by leaves... */
83 int refcnt; /* usage count of this class */ 104 int quantum; /* but stored for parent-to-leaf return */
105
106 struct tcf_proto *filter_list; /* class attached filters */
107 int filter_cnt;
108 int refcnt; /* usage count of this class */
109
110 int level; /* our level (see above) */
111 unsigned int children;
112 struct htb_class *parent; /* parent class */
84 113
85 /* topology */ 114 struct gnet_stats_rate_est64 rate_est;
86 int level; /* our level (see above) */
87 unsigned int children;
88 struct htb_class *parent; /* parent class */
89 115
90 int prio; /* these two are used only by leaves... */ 116 /*
91 int quantum; /* but stored for parent-to-leaf return */ 117 * Written often fields
118 */
119 struct gnet_stats_basic_packed bstats;
120 struct gnet_stats_queue qstats;
121 struct tc_htb_xstats xstats; /* our special stats */
122
123 /* token bucket parameters */
124 s64 tokens, ctokens;/* current number of tokens */
125 s64 t_c; /* checkpoint time */
92 126
93 union { 127 union {
94 struct htb_class_leaf { 128 struct htb_class_leaf {
95 struct Qdisc *q;
96 int deficit[TC_HTB_MAXDEPTH];
97 struct list_head drop_list; 129 struct list_head drop_list;
130 int deficit[TC_HTB_MAXDEPTH];
131 struct Qdisc *q;
98 } leaf; 132 } leaf;
99 struct htb_class_inner { 133 struct htb_class_inner {
100 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ 134 struct htb_prio clprio[TC_HTB_NUMPRIO];
101 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
102 /* When class changes from state 1->2 and disconnects from
103 * parent's feed then we lost ptr value and start from the
104 * first child again. Here we store classid of the
105 * last valid ptr (used when ptr is NULL).
106 */
107 u32 last_ptr_id[TC_HTB_NUMPRIO];
108 } inner; 135 } inner;
109 } un; 136 } un;
110 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ 137 s64 pq_key;
111 struct rb_node pq_node; /* node for event queue */
112 s64 pq_key;
113
114 int prio_activity; /* for which prios are we active */
115 enum htb_cmode cmode; /* current mode of the class */
116 138
117 /* class attached filters */ 139 int prio_activity; /* for which prios are we active */
118 struct tcf_proto *filter_list; 140 enum htb_cmode cmode; /* current mode of the class */
119 int filter_cnt; 141 struct rb_node pq_node; /* node for event queue */
142 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
143};
120 144
121 /* token bucket parameters */ 145struct htb_level {
122 struct psched_ratecfg rate; 146 struct rb_root wait_pq;
123 struct psched_ratecfg ceil; 147 struct htb_prio hprio[TC_HTB_NUMPRIO];
124 s64 buffer, cbuffer; /* token bucket depth/rate */
125 s64 mbuffer; /* max wait time */
126 s64 tokens, ctokens; /* current number of tokens */
127 s64 t_c; /* checkpoint time */
128}; 148};
129 149
130struct htb_sched { 150struct htb_sched {
131 struct Qdisc_class_hash clhash; 151 struct Qdisc_class_hash clhash;
132 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ 152 int defcls; /* class where unclassified flows go to */
153 int rate2quantum; /* quant = rate / rate2quantum */
133 154
134 /* self list - roots of self generating tree */ 155 /* filters for qdisc itself */
135 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; 156 struct tcf_proto *filter_list;
136 int row_mask[TC_HTB_MAXDEPTH];
137 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
138 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
139 157
140 /* self wait list - roots of wait PQs per row */ 158#define HTB_WARN_TOOMANYEVENTS 0x1
141 struct rb_root wait_pq[TC_HTB_MAXDEPTH]; 159 unsigned int warned; /* only one warning */
160 int direct_qlen;
161 struct work_struct work;
142 162
143 /* time of nearest event per level (row) */ 163 /* non shaped skbs; let them go directly thru */
144 s64 near_ev_cache[TC_HTB_MAXDEPTH]; 164 struct sk_buff_head direct_queue;
165 long direct_pkts;
145 166
146 int defcls; /* class where unclassified flows go to */ 167 struct qdisc_watchdog watchdog;
147 168
148 /* filters for qdisc itself */ 169 s64 now; /* cached dequeue time */
149 struct tcf_proto *filter_list; 170 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
150 171
151 int rate2quantum; /* quant = rate / rate2quantum */ 172 /* time of nearest event per level (row) */
152 s64 now; /* cached dequeue time */ 173 s64 near_ev_cache[TC_HTB_MAXDEPTH];
153 struct qdisc_watchdog watchdog;
154
155 /* non shaped skbs; let them go directly thru */
156 struct sk_buff_head direct_queue;
157 int direct_qlen; /* max qlen of above */
158 174
159 long direct_pkts; 175 int row_mask[TC_HTB_MAXDEPTH];
160 176
161#define HTB_WARN_TOOMANYEVENTS 0x1 177 struct htb_level hlevel[TC_HTB_MAXDEPTH];
162 unsigned int warned; /* only one warning */
163 struct work_struct work;
164}; 178};
165 179
166/* find class in global hash table using given handle */ 180/* find class in global hash table using given handle */
@@ -276,7 +290,7 @@ static void htb_add_to_id_tree(struct rb_root *root,
276static void htb_add_to_wait_tree(struct htb_sched *q, 290static void htb_add_to_wait_tree(struct htb_sched *q,
277 struct htb_class *cl, s64 delay) 291 struct htb_class *cl, s64 delay)
278{ 292{
279 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; 293 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
280 294
281 cl->pq_key = q->now + delay; 295 cl->pq_key = q->now + delay;
282 if (cl->pq_key == q->now) 296 if (cl->pq_key == q->now)
@@ -296,7 +310,7 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
296 p = &parent->rb_left; 310 p = &parent->rb_left;
297 } 311 }
298 rb_link_node(&cl->pq_node, parent, p); 312 rb_link_node(&cl->pq_node, parent, p);
299 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]); 313 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
300} 314}
301 315
302/** 316/**
@@ -323,7 +337,7 @@ static inline void htb_add_class_to_row(struct htb_sched *q,
323 while (mask) { 337 while (mask) {
324 int prio = ffz(~mask); 338 int prio = ffz(~mask);
325 mask &= ~(1 << prio); 339 mask &= ~(1 << prio);
326 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio); 340 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
327 } 341 }
328} 342}
329 343
@@ -349,16 +363,18 @@ static inline void htb_remove_class_from_row(struct htb_sched *q,
349 struct htb_class *cl, int mask) 363 struct htb_class *cl, int mask)
350{ 364{
351 int m = 0; 365 int m = 0;
366 struct htb_level *hlevel = &q->hlevel[cl->level];
352 367
353 while (mask) { 368 while (mask) {
354 int prio = ffz(~mask); 369 int prio = ffz(~mask);
370 struct htb_prio *hprio = &hlevel->hprio[prio];
355 371
356 mask &= ~(1 << prio); 372 mask &= ~(1 << prio);
357 if (q->ptr[cl->level][prio] == cl->node + prio) 373 if (hprio->ptr == cl->node + prio)
358 htb_next_rb_node(q->ptr[cl->level] + prio); 374 htb_next_rb_node(&hprio->ptr);
359 375
360 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio); 376 htb_safe_rb_erase(cl->node + prio, &hprio->row);
361 if (!q->row[cl->level][prio].rb_node) 377 if (!hprio->row.rb_node)
362 m |= 1 << prio; 378 m |= 1 << prio;
363 } 379 }
364 q->row_mask[cl->level] &= ~m; 380 q->row_mask[cl->level] &= ~m;
@@ -382,13 +398,13 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
382 int prio = ffz(~m); 398 int prio = ffz(~m);
383 m &= ~(1 << prio); 399 m &= ~(1 << prio);
384 400
385 if (p->un.inner.feed[prio].rb_node) 401 if (p->un.inner.clprio[prio].feed.rb_node)
386 /* parent already has its feed in use so that 402 /* parent already has its feed in use so that
387 * reset bit in mask as parent is already ok 403 * reset bit in mask as parent is already ok
388 */ 404 */
389 mask &= ~(1 << prio); 405 mask &= ~(1 << prio);
390 406
391 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); 407 htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio);
392 } 408 }
393 p->prio_activity |= mask; 409 p->prio_activity |= mask;
394 cl = p; 410 cl = p;
@@ -418,18 +434,19 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
418 int prio = ffz(~m); 434 int prio = ffz(~m);
419 m &= ~(1 << prio); 435 m &= ~(1 << prio);
420 436
421 if (p->un.inner.ptr[prio] == cl->node + prio) { 437 if (p->un.inner.clprio[prio].ptr == cl->node + prio) {
422 /* we are removing child which is pointed to from 438 /* we are removing child which is pointed to from
423 * parent feed - forget the pointer but remember 439 * parent feed - forget the pointer but remember
424 * classid 440 * classid
425 */ 441 */
426 p->un.inner.last_ptr_id[prio] = cl->common.classid; 442 p->un.inner.clprio[prio].last_ptr_id = cl->common.classid;
427 p->un.inner.ptr[prio] = NULL; 443 p->un.inner.clprio[prio].ptr = NULL;
428 } 444 }
429 445
430 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio); 446 htb_safe_rb_erase(cl->node + prio,
447 &p->un.inner.clprio[prio].feed);
431 448
432 if (!p->un.inner.feed[prio].rb_node) 449 if (!p->un.inner.clprio[prio].feed.rb_node)
433 mask |= 1 << prio; 450 mask |= 1 << prio;
434 } 451 }
435 452
@@ -644,7 +661,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
644 htb_change_class_mode(q, cl, &diff); 661 htb_change_class_mode(q, cl, &diff);
645 if (old_mode != cl->cmode) { 662 if (old_mode != cl->cmode) {
646 if (old_mode != HTB_CAN_SEND) 663 if (old_mode != HTB_CAN_SEND)
647 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); 664 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
648 if (cl->cmode != HTB_CAN_SEND) 665 if (cl->cmode != HTB_CAN_SEND)
649 htb_add_to_wait_tree(q, cl, diff); 666 htb_add_to_wait_tree(q, cl, diff);
650 } 667 }
@@ -664,7 +681,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
664 * next pending event (0 for no event in pq, q->now for too many events). 681 * next pending event (0 for no event in pq, q->now for too many events).
665 * Note: Applied are events whose have cl->pq_key <= q->now. 682 * Note: Applied are events whose have cl->pq_key <= q->now.
666 */ 683 */
667static s64 htb_do_events(struct htb_sched *q, int level, 684static s64 htb_do_events(struct htb_sched *q, const int level,
668 unsigned long start) 685 unsigned long start)
669{ 686{
670 /* don't run for longer than 2 jiffies; 2 is used instead of 687 /* don't run for longer than 2 jiffies; 2 is used instead of
@@ -672,10 +689,12 @@ static s64 htb_do_events(struct htb_sched *q, int level,
672 * too soon 689 * too soon
673 */ 690 */
674 unsigned long stop_at = start + 2; 691 unsigned long stop_at = start + 2;
692 struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
693
675 while (time_before(jiffies, stop_at)) { 694 while (time_before(jiffies, stop_at)) {
676 struct htb_class *cl; 695 struct htb_class *cl;
677 s64 diff; 696 s64 diff;
678 struct rb_node *p = rb_first(&q->wait_pq[level]); 697 struct rb_node *p = rb_first(wait_pq);
679 698
680 if (!p) 699 if (!p)
681 return 0; 700 return 0;
@@ -684,7 +703,7 @@ static s64 htb_do_events(struct htb_sched *q, int level,
684 if (cl->pq_key > q->now) 703 if (cl->pq_key > q->now)
685 return cl->pq_key; 704 return cl->pq_key;
686 705
687 htb_safe_rb_erase(p, q->wait_pq + level); 706 htb_safe_rb_erase(p, wait_pq);
688 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); 707 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
689 htb_change_class_mode(q, cl, &diff); 708 htb_change_class_mode(q, cl, &diff);
690 if (cl->cmode != HTB_CAN_SEND) 709 if (cl->cmode != HTB_CAN_SEND)
@@ -728,8 +747,7 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
728 * 747 *
729 * Find leaf where current feed pointers points to. 748 * Find leaf where current feed pointers points to.
730 */ 749 */
731static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, 750static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
732 struct rb_node **pptr, u32 * pid)
733{ 751{
734 int i; 752 int i;
735 struct { 753 struct {
@@ -738,10 +756,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
738 u32 *pid; 756 u32 *pid;
739 } stk[TC_HTB_MAXDEPTH], *sp = stk; 757 } stk[TC_HTB_MAXDEPTH], *sp = stk;
740 758
741 BUG_ON(!tree->rb_node); 759 BUG_ON(!hprio->row.rb_node);
742 sp->root = tree->rb_node; 760 sp->root = hprio->row.rb_node;
743 sp->pptr = pptr; 761 sp->pptr = &hprio->ptr;
744 sp->pid = pid; 762 sp->pid = &hprio->last_ptr_id;
745 763
746 for (i = 0; i < 65535; i++) { 764 for (i = 0; i < 65535; i++) {
747 if (!*sp->pptr && *sp->pid) { 765 if (!*sp->pptr && *sp->pid) {
@@ -768,12 +786,15 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
768 } 786 }
769 } else { 787 } else {
770 struct htb_class *cl; 788 struct htb_class *cl;
789 struct htb_prio *clp;
790
771 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); 791 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
772 if (!cl->level) 792 if (!cl->level)
773 return cl; 793 return cl;
774 (++sp)->root = cl->un.inner.feed[prio].rb_node; 794 clp = &cl->un.inner.clprio[prio];
775 sp->pptr = cl->un.inner.ptr + prio; 795 (++sp)->root = clp->feed.rb_node;
776 sp->pid = cl->un.inner.last_ptr_id + prio; 796 sp->pptr = &clp->ptr;
797 sp->pid = &clp->last_ptr_id;
777 } 798 }
778 } 799 }
779 WARN_ON(1); 800 WARN_ON(1);
@@ -783,15 +804,16 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
783/* dequeues packet at given priority and level; call only if 804/* dequeues packet at given priority and level; call only if
784 * you are sure that there is active class at prio/level 805 * you are sure that there is active class at prio/level
785 */ 806 */
786static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, 807static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
787 int level) 808 const int level)
788{ 809{
789 struct sk_buff *skb = NULL; 810 struct sk_buff *skb = NULL;
790 struct htb_class *cl, *start; 811 struct htb_class *cl, *start;
812 struct htb_level *hlevel = &q->hlevel[level];
813 struct htb_prio *hprio = &hlevel->hprio[prio];
814
791 /* look initial class up in the row */ 815 /* look initial class up in the row */
792 start = cl = htb_lookup_leaf(q->row[level] + prio, prio, 816 start = cl = htb_lookup_leaf(hprio, prio);
793 q->ptr[level] + prio,
794 q->last_ptr_id[level] + prio);
795 817
796 do { 818 do {
797next: 819next:
@@ -811,9 +833,7 @@ next:
811 if ((q->row_mask[level] & (1 << prio)) == 0) 833 if ((q->row_mask[level] & (1 << prio)) == 0)
812 return NULL; 834 return NULL;
813 835
814 next = htb_lookup_leaf(q->row[level] + prio, 836 next = htb_lookup_leaf(hprio, prio);
815 prio, q->ptr[level] + prio,
816 q->last_ptr_id[level] + prio);
817 837
818 if (cl == start) /* fix start if we just deleted it */ 838 if (cl == start) /* fix start if we just deleted it */
819 start = next; 839 start = next;
@@ -826,11 +846,9 @@ next:
826 break; 846 break;
827 847
828 qdisc_warn_nonwc("htb", cl->un.leaf.q); 848 qdisc_warn_nonwc("htb", cl->un.leaf.q);
829 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> 849 htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr:
830 ptr[0]) + prio); 850 &q->hlevel[0].hprio[prio].ptr);
831 cl = htb_lookup_leaf(q->row[level] + prio, prio, 851 cl = htb_lookup_leaf(hprio, prio);
832 q->ptr[level] + prio,
833 q->last_ptr_id[level] + prio);
834 852
835 } while (cl != start); 853 } while (cl != start);
836 854
@@ -839,8 +857,8 @@ next:
839 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); 857 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
840 if (cl->un.leaf.deficit[level] < 0) { 858 if (cl->un.leaf.deficit[level] < 0) {
841 cl->un.leaf.deficit[level] += cl->quantum; 859 cl->un.leaf.deficit[level] += cl->quantum;
842 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> 860 htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr :
843 ptr[0]) + prio); 861 &q->hlevel[0].hprio[prio].ptr);
844 } 862 }
845 /* this used to be after charge_class but this constelation 863 /* this used to be after charge_class but this constelation
846 * gives us slightly better performance 864 * gives us slightly better performance
@@ -880,15 +898,14 @@ ok:
880 for (level = 0; level < TC_HTB_MAXDEPTH; level++) { 898 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
881 /* common case optimization - skip event handler quickly */ 899 /* common case optimization - skip event handler quickly */
882 int m; 900 int m;
883 s64 event; 901 s64 event = q->near_ev_cache[level];
884 902
885 if (q->now >= q->near_ev_cache[level]) { 903 if (q->now >= event) {
886 event = htb_do_events(q, level, start_at); 904 event = htb_do_events(q, level, start_at);
887 if (!event) 905 if (!event)
888 event = q->now + NSEC_PER_SEC; 906 event = q->now + NSEC_PER_SEC;
889 q->near_ev_cache[level] = event; 907 q->near_ev_cache[level] = event;
890 } else 908 }
891 event = q->near_ev_cache[level];
892 909
893 if (next_event > event) 910 if (next_event > event)
894 next_event = event; 911 next_event = event;
@@ -968,10 +985,8 @@ static void htb_reset(struct Qdisc *sch)
968 qdisc_watchdog_cancel(&q->watchdog); 985 qdisc_watchdog_cancel(&q->watchdog);
969 __skb_queue_purge(&q->direct_queue); 986 __skb_queue_purge(&q->direct_queue);
970 sch->q.qlen = 0; 987 sch->q.qlen = 0;
971 memset(q->row, 0, sizeof(q->row)); 988 memset(q->hlevel, 0, sizeof(q->hlevel));
972 memset(q->row_mask, 0, sizeof(q->row_mask)); 989 memset(q->row_mask, 0, sizeof(q->row_mask));
973 memset(q->wait_pq, 0, sizeof(q->wait_pq));
974 memset(q->ptr, 0, sizeof(q->ptr));
975 for (i = 0; i < TC_HTB_NUMPRIO; i++) 990 for (i = 0; i < TC_HTB_NUMPRIO; i++)
976 INIT_LIST_HEAD(q->drops + i); 991 INIT_LIST_HEAD(q->drops + i);
977} 992}
@@ -1192,7 +1207,8 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1192 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity); 1207 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1193 1208
1194 if (parent->cmode != HTB_CAN_SEND) 1209 if (parent->cmode != HTB_CAN_SEND)
1195 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level); 1210 htb_safe_rb_erase(&parent->pq_node,
1211 &q->hlevel[parent->level].wait_pq);
1196 1212
1197 parent->level = 0; 1213 parent->level = 0;
1198 memset(&parent->un.inner, 0, sizeof(parent->un.inner)); 1214 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
@@ -1281,7 +1297,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1281 htb_deactivate(q, cl); 1297 htb_deactivate(q, cl);
1282 1298
1283 if (cl->cmode != HTB_CAN_SEND) 1299 if (cl->cmode != HTB_CAN_SEND)
1284 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); 1300 htb_safe_rb_erase(&cl->pq_node,
1301 &q->hlevel[cl->level].wait_pq);
1285 1302
1286 if (last_child) 1303 if (last_child)
1287 htb_parent_to_leaf(q, cl, new_q); 1304 htb_parent_to_leaf(q, cl, new_q);
@@ -1312,6 +1329,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1312 struct htb_sched *q = qdisc_priv(sch); 1329 struct htb_sched *q = qdisc_priv(sch);
1313 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1330 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1314 struct nlattr *opt = tca[TCA_OPTIONS]; 1331 struct nlattr *opt = tca[TCA_OPTIONS];
1332 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1315 struct nlattr *tb[TCA_HTB_MAX + 1]; 1333 struct nlattr *tb[TCA_HTB_MAX + 1];
1316 struct tc_htb_opt *hopt; 1334 struct tc_htb_opt *hopt;
1317 1335
@@ -1333,6 +1351,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1333 if (!hopt->rate.rate || !hopt->ceil.rate) 1351 if (!hopt->rate.rate || !hopt->ceil.rate)
1334 goto failure; 1352 goto failure;
1335 1353
1354 /* Keeping backward compatible with rate_table based iproute2 tc */
1355 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
1356 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1357 if (rtab)
1358 qdisc_put_rtab(rtab);
1359 }
1360 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) {
1361 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1362 if (ctab)
1363 qdisc_put_rtab(ctab);
1364 }
1365
1336 if (!cl) { /* new class */ 1366 if (!cl) { /* new class */
1337 struct Qdisc *new_q; 1367 struct Qdisc *new_q;
1338 int prio; 1368 int prio;
@@ -1366,12 +1396,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1366 if (!cl) 1396 if (!cl)
1367 goto failure; 1397 goto failure;
1368 1398
1369 err = gen_new_estimator(&cl->bstats, &cl->rate_est, 1399 if (htb_rate_est || tca[TCA_RATE]) {
1370 qdisc_root_sleeping_lock(sch), 1400 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1371 tca[TCA_RATE] ? : &est.nla); 1401 qdisc_root_sleeping_lock(sch),
1372 if (err) { 1402 tca[TCA_RATE] ? : &est.nla);
1373 kfree(cl); 1403 if (err) {
1374 goto failure; 1404 kfree(cl);
1405 goto failure;
1406 }
1375 } 1407 }
1376 1408
1377 cl->refcnt = 1; 1409 cl->refcnt = 1;
@@ -1401,7 +1433,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1401 1433
1402 /* remove from evt list because of level change */ 1434 /* remove from evt list because of level change */
1403 if (parent->cmode != HTB_CAN_SEND) { 1435 if (parent->cmode != HTB_CAN_SEND) {
1404 htb_safe_rb_erase(&parent->pq_node, q->wait_pq); 1436 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1405 parent->cmode = HTB_CAN_SEND; 1437 parent->cmode = HTB_CAN_SEND;
1406 } 1438 }
1407 parent->level = (parent->parent ? parent->parent->level 1439 parent->level = (parent->parent ? parent->parent->level
@@ -1463,7 +1495,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1463 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil); 1495 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
1464 1496
1465 cl->buffer = PSCHED_TICKS2NS(hopt->buffer); 1497 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1466 cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); 1498 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
1467 1499
1468 sch_tree_unlock(sch); 1500 sch_tree_unlock(sch);
1469 1501
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 5da78a19ac9a..2e56185736d6 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -57,7 +57,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
57 57
58 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 58 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
59 dev_queue = netdev_get_tx_queue(dev, ntx); 59 dev_queue = netdev_get_tx_queue(dev, ntx);
60 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, 60 qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
61 TC_H_MAKE(TC_H_MAJ(sch->handle), 61 TC_H_MAKE(TC_H_MAJ(sch->handle),
62 TC_H_MIN(ntx + 1))); 62 TC_H_MIN(ntx + 1)));
63 if (qdisc == NULL) 63 if (qdisc == NULL)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index accec33c454c..d44c868cb537 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -124,7 +124,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
124 124
125 for (i = 0; i < dev->num_tx_queues; i++) { 125 for (i = 0; i < dev->num_tx_queues; i++) {
126 dev_queue = netdev_get_tx_queue(dev, i); 126 dev_queue = netdev_get_tx_queue(dev, i);
127 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, 127 qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
128 TC_H_MAKE(TC_H_MAJ(sch->handle), 128 TC_H_MAKE(TC_H_MAJ(sch->handle),
129 TC_H_MIN(i + 1))); 129 TC_H_MIN(i + 1)));
130 if (qdisc == NULL) { 130 if (qdisc == NULL) {
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 3d2acc7a9c80..a6d788d45216 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -23,6 +23,7 @@
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <linux/rtnetlink.h> 24#include <linux/rtnetlink.h>
25#include <linux/reciprocal_div.h> 25#include <linux/reciprocal_div.h>
26#include <linux/rbtree.h>
26 27
27#include <net/netlink.h> 28#include <net/netlink.h>
28#include <net/pkt_sched.h> 29#include <net/pkt_sched.h>
@@ -68,7 +69,8 @@
68*/ 69*/
69 70
70struct netem_sched_data { 71struct netem_sched_data {
71 /* internal t(ime)fifo qdisc uses sch->q and sch->limit */ 72 /* internal t(ime)fifo qdisc uses t_root and sch->limit */
73 struct rb_root t_root;
72 74
73 /* optional qdisc for classful handling (NULL at netem init) */ 75 /* optional qdisc for classful handling (NULL at netem init) */
74 struct Qdisc *qdisc; 76 struct Qdisc *qdisc;
@@ -128,10 +130,35 @@ struct netem_sched_data {
128 */ 130 */
129struct netem_skb_cb { 131struct netem_skb_cb {
130 psched_time_t time_to_send; 132 psched_time_t time_to_send;
133 ktime_t tstamp_save;
131}; 134};
132 135
136/* Because space in skb->cb[] is tight, netem overloads skb->next/prev/tstamp
137 * to hold a rb_node structure.
138 *
139 * If struct sk_buff layout is changed, the following checks will complain.
140 */
141static struct rb_node *netem_rb_node(struct sk_buff *skb)
142{
143 BUILD_BUG_ON(offsetof(struct sk_buff, next) != 0);
144 BUILD_BUG_ON(offsetof(struct sk_buff, prev) !=
145 offsetof(struct sk_buff, next) + sizeof(skb->next));
146 BUILD_BUG_ON(offsetof(struct sk_buff, tstamp) !=
147 offsetof(struct sk_buff, prev) + sizeof(skb->prev));
148 BUILD_BUG_ON(sizeof(struct rb_node) > sizeof(skb->next) +
149 sizeof(skb->prev) +
150 sizeof(skb->tstamp));
151 return (struct rb_node *)&skb->next;
152}
153
154static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
155{
156 return (struct sk_buff *)rb;
157}
158
133static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 159static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
134{ 160{
161 /* we assume we can use skb next/prev/tstamp as storage for rb_node */
135 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); 162 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
136 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; 163 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
137} 164}
@@ -333,20 +360,23 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
333 360
334static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 361static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
335{ 362{
336 struct sk_buff_head *list = &sch->q; 363 struct netem_sched_data *q = qdisc_priv(sch);
337 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; 364 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
338 struct sk_buff *skb = skb_peek_tail(list); 365 struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
339 366
340 /* Optimize for add at tail */ 367 while (*p) {
341 if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) 368 struct sk_buff *skb;
342 return __skb_queue_tail(list, nskb);
343 369
344 skb_queue_reverse_walk(list, skb) { 370 parent = *p;
371 skb = netem_rb_to_skb(parent);
345 if (tnext >= netem_skb_cb(skb)->time_to_send) 372 if (tnext >= netem_skb_cb(skb)->time_to_send)
346 break; 373 p = &parent->rb_right;
374 else
375 p = &parent->rb_left;
347 } 376 }
348 377 rb_link_node(netem_rb_node(nskb), parent, p);
349 __skb_queue_after(list, skb, nskb); 378 rb_insert_color(netem_rb_node(nskb), &q->t_root);
379 sch->q.qlen++;
350} 380}
351 381
352/* 382/*
@@ -382,12 +412,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
382 412
383 /* If a delay is expected, orphan the skb. (orphaning usually takes 413 /* If a delay is expected, orphan the skb. (orphaning usually takes
384 * place at TX completion time, so _before_ the link transit delay) 414 * place at TX completion time, so _before_ the link transit delay)
385 * Ideally, this orphaning should be done after the rate limiting
386 * module, because this breaks TCP Small Queue, and other mechanisms
387 * based on socket sk_wmem_alloc.
388 */ 415 */
389 if (q->latency || q->jitter) 416 if (q->latency || q->jitter)
390 skb_orphan(skb); 417 skb_orphan_partial(skb);
391 418
392 /* 419 /*
393 * If we need to duplicate packet, then re-insert at top of the 420 * If we need to duplicate packet, then re-insert at top of the
@@ -436,23 +463,28 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
436 now = psched_get_time(); 463 now = psched_get_time();
437 464
438 if (q->rate) { 465 if (q->rate) {
439 struct sk_buff_head *list = &sch->q; 466 struct sk_buff *last;
440 467
441 if (!skb_queue_empty(list)) { 468 if (!skb_queue_empty(&sch->q))
469 last = skb_peek_tail(&sch->q);
470 else
471 last = netem_rb_to_skb(rb_last(&q->t_root));
472 if (last) {
442 /* 473 /*
443 * Last packet in queue is reference point (now), 474 * Last packet in queue is reference point (now),
444 * calculate this time bonus and subtract 475 * calculate this time bonus and subtract
445 * from delay. 476 * from delay.
446 */ 477 */
447 delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now; 478 delay -= netem_skb_cb(last)->time_to_send - now;
448 delay = max_t(psched_tdiff_t, 0, delay); 479 delay = max_t(psched_tdiff_t, 0, delay);
449 now = netem_skb_cb(skb_peek_tail(list))->time_to_send; 480 now = netem_skb_cb(last)->time_to_send;
450 } 481 }
451 482
452 delay += packet_len_2_sched_time(skb->len, q); 483 delay += packet_len_2_sched_time(skb->len, q);
453 } 484 }
454 485
455 cb->time_to_send = now + delay; 486 cb->time_to_send = now + delay;
487 cb->tstamp_save = skb->tstamp;
456 ++q->counter; 488 ++q->counter;
457 tfifo_enqueue(skb, sch); 489 tfifo_enqueue(skb, sch);
458 } else { 490 } else {
@@ -476,6 +508,21 @@ static unsigned int netem_drop(struct Qdisc *sch)
476 unsigned int len; 508 unsigned int len;
477 509
478 len = qdisc_queue_drop(sch); 510 len = qdisc_queue_drop(sch);
511
512 if (!len) {
513 struct rb_node *p = rb_first(&q->t_root);
514
515 if (p) {
516 struct sk_buff *skb = netem_rb_to_skb(p);
517
518 rb_erase(p, &q->t_root);
519 sch->q.qlen--;
520 skb->next = NULL;
521 skb->prev = NULL;
522 len = qdisc_pkt_len(skb);
523 kfree_skb(skb);
524 }
525 }
479 if (!len && q->qdisc && q->qdisc->ops->drop) 526 if (!len && q->qdisc && q->qdisc->ops->drop)
480 len = q->qdisc->ops->drop(q->qdisc); 527 len = q->qdisc->ops->drop(q->qdisc);
481 if (len) 528 if (len)
@@ -488,19 +535,35 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
488{ 535{
489 struct netem_sched_data *q = qdisc_priv(sch); 536 struct netem_sched_data *q = qdisc_priv(sch);
490 struct sk_buff *skb; 537 struct sk_buff *skb;
538 struct rb_node *p;
491 539
492 if (qdisc_is_throttled(sch)) 540 if (qdisc_is_throttled(sch))
493 return NULL; 541 return NULL;
494 542
495tfifo_dequeue: 543tfifo_dequeue:
496 skb = qdisc_peek_head(sch); 544 skb = __skb_dequeue(&sch->q);
497 if (skb) { 545 if (skb) {
498 const struct netem_skb_cb *cb = netem_skb_cb(skb); 546deliver:
547 sch->qstats.backlog -= qdisc_pkt_len(skb);
548 qdisc_unthrottled(sch);
549 qdisc_bstats_update(sch, skb);
550 return skb;
551 }
552 p = rb_first(&q->t_root);
553 if (p) {
554 psched_time_t time_to_send;
555
556 skb = netem_rb_to_skb(p);
499 557
500 /* if more time remaining? */ 558 /* if more time remaining? */
501 if (cb->time_to_send <= psched_get_time()) { 559 time_to_send = netem_skb_cb(skb)->time_to_send;
502 __skb_unlink(skb, &sch->q); 560 if (time_to_send <= psched_get_time()) {
503 sch->qstats.backlog -= qdisc_pkt_len(skb); 561 rb_erase(p, &q->t_root);
562
563 sch->q.qlen--;
564 skb->next = NULL;
565 skb->prev = NULL;
566 skb->tstamp = netem_skb_cb(skb)->tstamp_save;
504 567
505#ifdef CONFIG_NET_CLS_ACT 568#ifdef CONFIG_NET_CLS_ACT
506 /* 569 /*
@@ -522,10 +585,7 @@ tfifo_dequeue:
522 } 585 }
523 goto tfifo_dequeue; 586 goto tfifo_dequeue;
524 } 587 }
525deliver: 588 goto deliver;
526 qdisc_unthrottled(sch);
527 qdisc_bstats_update(sch, skb);
528 return skb;
529 } 589 }
530 590
531 if (q->qdisc) { 591 if (q->qdisc) {
@@ -533,7 +593,7 @@ deliver:
533 if (skb) 593 if (skb)
534 goto deliver; 594 goto deliver;
535 } 595 }
536 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); 596 qdisc_watchdog_schedule(&q->watchdog, time_to_send);
537 } 597 }
538 598
539 if (q->qdisc) { 599 if (q->qdisc) {
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d51852bba01c..8056fb4e618a 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -113,7 +113,6 @@
113 113
114#define FRAC_BITS 30 /* fixed point arithmetic */ 114#define FRAC_BITS 30 /* fixed point arithmetic */
115#define ONE_FP (1UL << FRAC_BITS) 115#define ONE_FP (1UL << FRAC_BITS)
116#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
117 116
118#define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ 117#define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */
119#define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ 118#define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */
@@ -138,7 +137,7 @@ struct qfq_class {
138 137
139 struct gnet_stats_basic_packed bstats; 138 struct gnet_stats_basic_packed bstats;
140 struct gnet_stats_queue qstats; 139 struct gnet_stats_queue qstats;
141 struct gnet_stats_rate_est rate_est; 140 struct gnet_stats_rate_est64 rate_est;
142 struct Qdisc *qdisc; 141 struct Qdisc *qdisc;
143 struct list_head alist; /* Link for active-classes list. */ 142 struct list_head alist; /* Link for active-classes list. */
144 struct qfq_aggregate *agg; /* Parent aggregate. */ 143 struct qfq_aggregate *agg; /* Parent aggregate. */
@@ -189,6 +188,7 @@ struct qfq_sched {
189 struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ 188 struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */
190 u32 num_active_agg; /* Num. of active aggregates */ 189 u32 num_active_agg; /* Num. of active aggregates */
191 u32 wsum; /* weight sum */ 190 u32 wsum; /* weight sum */
191 u32 iwsum; /* inverse weight sum */
192 192
193 unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ 193 unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
194 struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ 194 struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
@@ -314,6 +314,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
314 314
315 q->wsum += 315 q->wsum +=
316 (int) agg->class_weight * (new_num_classes - agg->num_classes); 316 (int) agg->class_weight * (new_num_classes - agg->num_classes);
317 q->iwsum = ONE_FP / q->wsum;
317 318
318 agg->num_classes = new_num_classes; 319 agg->num_classes = new_num_classes;
319} 320}
@@ -340,6 +341,10 @@ static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
340{ 341{
341 if (!hlist_unhashed(&agg->nonfull_next)) 342 if (!hlist_unhashed(&agg->nonfull_next))
342 hlist_del_init(&agg->nonfull_next); 343 hlist_del_init(&agg->nonfull_next);
344 q->wsum -= agg->class_weight;
345 if (q->wsum != 0)
346 q->iwsum = ONE_FP / q->wsum;
347
343 if (q->in_serv_agg == agg) 348 if (q->in_serv_agg == agg)
344 q->in_serv_agg = qfq_choose_next_agg(q); 349 q->in_serv_agg = qfq_choose_next_agg(q);
345 kfree(agg); 350 kfree(agg);
@@ -821,44 +826,73 @@ static void qfq_make_eligible(struct qfq_sched *q)
821 unsigned long old_vslot = q->oldV >> q->min_slot_shift; 826 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
822 827
823 if (vslot != old_vslot) { 828 if (vslot != old_vslot) {
824 unsigned long mask = (1ULL << fls(vslot ^ old_vslot)) - 1; 829 unsigned long mask;
830 int last_flip_pos = fls(vslot ^ old_vslot);
831
832 if (last_flip_pos > 31) /* higher than the number of groups */
833 mask = ~0UL; /* make all groups eligible */
834 else
835 mask = (1UL << last_flip_pos) - 1;
836
825 qfq_move_groups(q, mask, IR, ER); 837 qfq_move_groups(q, mask, IR, ER);
826 qfq_move_groups(q, mask, IB, EB); 838 qfq_move_groups(q, mask, IB, EB);
827 } 839 }
828} 840}
829 841
830
831/* 842/*
832 * The index of the slot in which the aggregate is to be inserted must 843 * The index of the slot in which the input aggregate agg is to be
833 * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1' 844 * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2'
834 * because the start time of the group may be moved backward by one 845 * and not a '-1' because the start time of the group may be moved
835 * slot after the aggregate has been inserted, and this would cause 846 * backward by one slot after the aggregate has been inserted, and
836 * non-empty slots to be right-shifted by one position. 847 * this would cause non-empty slots to be right-shifted by one
848 * position.
837 * 849 *
838 * If the weight and lmax (max_pkt_size) of the classes do not change, 850 * QFQ+ fully satisfies this bound to the slot index if the parameters
839 * then QFQ+ does meet the above contraint according to the current 851 * of the classes are not changed dynamically, and if QFQ+ never
840 * values of its parameters. In fact, if the weight and lmax of the 852 * happens to postpone the service of agg unjustly, i.e., it never
841 * classes do not change, then, from the theory, QFQ+ guarantees that 853 * happens that the aggregate becomes backlogged and eligible, or just
842 * the slot index is never higher than 854 * eligible, while an aggregate with a higher approximated finish time
843 * 2 + QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * 855 * is being served. In particular, in this case QFQ+ guarantees that
844 * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM) = 2 + 8 * 128 * (1 / 64) = 18 856 * the timestamps of agg are low enough that the slot index is never
857 * higher than 2. Unfortunately, QFQ+ cannot provide the same
858 * guarantee if it happens to unjustly postpone the service of agg, or
859 * if the parameters of some class are changed.
845 * 860 *
846 * When the weight of a class is increased or the lmax of the class is 861 * As for the first event, i.e., an out-of-order service, the
847 * decreased, a new aggregate with smaller slot size than the original 862 * upper bound to the slot index guaranteed by QFQ+ grows to
848 * parent aggregate of the class may happen to be activated. The 863 * 2 +
849 * activation of this aggregate should be properly delayed to when the 864 * QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) *
850 * service of the class has finished in the ideal system tracked by 865 * (current_max_weight/current_wsum) <= 2 + 8 * 128 * 1.
851 * QFQ+. If the activation of the aggregate is not delayed to this 866 *
852 * reference time instant, then this aggregate may be unjustly served 867 * The following function deals with this problem by backward-shifting
853 * before other aggregates waiting for service. This may cause the 868 * the timestamps of agg, if needed, so as to guarantee that the slot
854 * above bound to the slot index to be violated for some of these 869 * index is never higher than QFQ_MAX_SLOTS-2. This backward-shift may
855 * unlucky aggregates. 870 * cause the service of other aggregates to be postponed, yet the
871 * worst-case guarantees of these aggregates are not violated. In
872 * fact, in case of no out-of-order service, the timestamps of agg
873 * would have been even lower than they are after the backward shift,
874 * because QFQ+ would have guaranteed a maximum value equal to 2 for
875 * the slot index, and 2 < QFQ_MAX_SLOTS-2. Hence the aggregates whose
876 * service is postponed because of the backward-shift would have
877 * however waited for the service of agg before being served.
878 *
879 * The other event that may cause the slot index to be higher than 2
880 * for agg is a recent change of the parameters of some class. If the
881 * weight of a class is increased or the lmax (max_pkt_size) of the
882 * class is decreased, then a new aggregate with smaller slot size
883 * than the original parent aggregate of the class may happen to be
884 * activated. The activation of this aggregate should be properly
885 * delayed to when the service of the class has finished in the ideal
886 * system tracked by QFQ+. If the activation of the aggregate is not
887 * delayed to this reference time instant, then this aggregate may be
888 * unjustly served before other aggregates waiting for service. This
889 * may cause the above bound to the slot index to be violated for some
890 * of these unlucky aggregates.
856 * 891 *
857 * Instead of delaying the activation of the new aggregate, which is 892 * Instead of delaying the activation of the new aggregate, which is
858 * quite complex, the following inaccurate but simple solution is used: 893 * quite complex, the above-discussed capping of the slot index is
859 * if the slot index is higher than QFQ_MAX_SLOTS-2, then the 894 * used to handle also the consequences of a change of the parameters
860 * timestamps of the aggregate are shifted backward so as to let the 895 * of a class.
861 * slot index become equal to QFQ_MAX_SLOTS-2.
862 */ 896 */
863static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg, 897static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
864 u64 roundedS) 898 u64 roundedS)
@@ -1003,9 +1037,61 @@ static inline void charge_actual_service(struct qfq_aggregate *agg)
1003 agg->F = agg->S + (u64)service_received * agg->inv_w; 1037 agg->F = agg->S + (u64)service_received * agg->inv_w;
1004} 1038}
1005 1039
1006static inline void qfq_update_agg_ts(struct qfq_sched *q, 1040/* Assign a reasonable start time for a new aggregate in group i.
1007 struct qfq_aggregate *agg, 1041 * Admissible values for \hat(F) are multiples of \sigma_i
1008 enum update_reason reason); 1042 * no greater than V+\sigma_i . Larger values mean that
1043 * we had a wraparound so we consider the timestamp to be stale.
1044 *
1045 * If F is not stale and F >= V then we set S = F.
1046 * Otherwise we should assign S = V, but this may violate
1047 * the ordering in EB (see [2]). So, if we have groups in ER,
1048 * set S to the F_j of the first group j which would be blocking us.
1049 * We are guaranteed not to move S backward because
1050 * otherwise our group i would still be blocked.
1051 */
1052static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1053{
1054 unsigned long mask;
1055 u64 limit, roundedF;
1056 int slot_shift = agg->grp->slot_shift;
1057
1058 roundedF = qfq_round_down(agg->F, slot_shift);
1059 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1060
1061 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1062 /* timestamp was stale */
1063 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1064 if (mask) {
1065 struct qfq_group *next = qfq_ffs(q, mask);
1066 if (qfq_gt(roundedF, next->F)) {
1067 if (qfq_gt(limit, next->F))
1068 agg->S = next->F;
1069 else /* preserve timestamp correctness */
1070 agg->S = limit;
1071 return;
1072 }
1073 }
1074 agg->S = q->V;
1075 } else /* timestamp is not stale */
1076 agg->S = agg->F;
1077}
1078
1079/* Update the timestamps of agg before scheduling/rescheduling it for
1080 * service. In particular, assign to agg->F its maximum possible
1081 * value, i.e., the virtual finish time with which the aggregate
1082 * should be labeled if it used all its budget once in service.
1083 */
1084static inline void
1085qfq_update_agg_ts(struct qfq_sched *q,
1086 struct qfq_aggregate *agg, enum update_reason reason)
1087{
1088 if (reason != requeue)
1089 qfq_update_start(q, agg);
1090 else /* just charge agg for the service received */
1091 agg->S = agg->F;
1092
1093 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1094}
1009 1095
1010static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg); 1096static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1011 1097
@@ -1077,7 +1163,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1077 else 1163 else
1078 in_serv_agg->budget -= len; 1164 in_serv_agg->budget -= len;
1079 1165
1080 q->V += (u64)len * IWSUM; 1166 q->V += (u64)len * q->iwsum;
1081 pr_debug("qfq dequeue: len %u F %lld now %lld\n", 1167 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1082 len, (unsigned long long) in_serv_agg->F, 1168 len, (unsigned long long) in_serv_agg->F,
1083 (unsigned long long) q->V); 1169 (unsigned long long) q->V);
@@ -1128,66 +1214,6 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1128 return agg; 1214 return agg;
1129} 1215}
1130 1216
1131/*
1132 * Assign a reasonable start time for a new aggregate in group i.
1133 * Admissible values for \hat(F) are multiples of \sigma_i
1134 * no greater than V+\sigma_i . Larger values mean that
1135 * we had a wraparound so we consider the timestamp to be stale.
1136 *
1137 * If F is not stale and F >= V then we set S = F.
1138 * Otherwise we should assign S = V, but this may violate
1139 * the ordering in EB (see [2]). So, if we have groups in ER,
1140 * set S to the F_j of the first group j which would be blocking us.
1141 * We are guaranteed not to move S backward because
1142 * otherwise our group i would still be blocked.
1143 */
1144static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1145{
1146 unsigned long mask;
1147 u64 limit, roundedF;
1148 int slot_shift = agg->grp->slot_shift;
1149
1150 roundedF = qfq_round_down(agg->F, slot_shift);
1151 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1152
1153 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1154 /* timestamp was stale */
1155 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1156 if (mask) {
1157 struct qfq_group *next = qfq_ffs(q, mask);
1158 if (qfq_gt(roundedF, next->F)) {
1159 if (qfq_gt(limit, next->F))
1160 agg->S = next->F;
1161 else /* preserve timestamp correctness */
1162 agg->S = limit;
1163 return;
1164 }
1165 }
1166 agg->S = q->V;
1167 } else /* timestamp is not stale */
1168 agg->S = agg->F;
1169}
1170
1171/*
1172 * Update the timestamps of agg before scheduling/rescheduling it for
1173 * service. In particular, assign to agg->F its maximum possible
1174 * value, i.e., the virtual finish time with which the aggregate
1175 * should be labeled if it used all its budget once in service.
1176 */
1177static inline void
1178qfq_update_agg_ts(struct qfq_sched *q,
1179 struct qfq_aggregate *agg, enum update_reason reason)
1180{
1181 if (reason != requeue)
1182 qfq_update_start(q, agg);
1183 else /* just charge agg for the service received */
1184 agg->S = agg->F;
1185
1186 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1187}
1188
1189static void qfq_schedule_agg(struct qfq_sched *, struct qfq_aggregate *);
1190
1191static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) 1217static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1192{ 1218{
1193 struct qfq_sched *q = qdisc_priv(sch); 1219 struct qfq_sched *q = qdisc_priv(sch);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index e478d316602b..1aaf1b6e51a2 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -116,14 +116,57 @@ struct tbf_sched_data {
116 struct qdisc_watchdog watchdog; /* Watchdog timer */ 116 struct qdisc_watchdog watchdog; /* Watchdog timer */
117}; 117};
118 118
119
120/* GSO packet is too big, segment it so that tbf can transmit
121 * each segment in time
122 */
123static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
124{
125 struct tbf_sched_data *q = qdisc_priv(sch);
126 struct sk_buff *segs, *nskb;
127 netdev_features_t features = netif_skb_features(skb);
128 int ret, nb;
129
130 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
131
132 if (IS_ERR_OR_NULL(segs))
133 return qdisc_reshape_fail(skb, sch);
134
135 nb = 0;
136 while (segs) {
137 nskb = segs->next;
138 segs->next = NULL;
139 if (likely(segs->len <= q->max_size)) {
140 qdisc_skb_cb(segs)->pkt_len = segs->len;
141 ret = qdisc_enqueue(segs, q->qdisc);
142 } else {
143 ret = qdisc_reshape_fail(skb, sch);
144 }
145 if (ret != NET_XMIT_SUCCESS) {
146 if (net_xmit_drop_count(ret))
147 sch->qstats.drops++;
148 } else {
149 nb++;
150 }
151 segs = nskb;
152 }
153 sch->q.qlen += nb;
154 if (nb > 1)
155 qdisc_tree_decrease_qlen(sch, 1 - nb);
156 consume_skb(skb);
157 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
158}
159
119static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) 160static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
120{ 161{
121 struct tbf_sched_data *q = qdisc_priv(sch); 162 struct tbf_sched_data *q = qdisc_priv(sch);
122 int ret; 163 int ret;
123 164
124 if (qdisc_pkt_len(skb) > q->max_size) 165 if (qdisc_pkt_len(skb) > q->max_size) {
166 if (skb_is_gso(skb))
167 return tbf_segment(skb, sch);
125 return qdisc_reshape_fail(skb, sch); 168 return qdisc_reshape_fail(skb, sch);
126 169 }
127 ret = qdisc_enqueue(skb, q->qdisc); 170 ret = qdisc_enqueue(skb, q->qdisc);
128 if (ret != NET_XMIT_SUCCESS) { 171 if (ret != NET_XMIT_SUCCESS) {
129 if (net_xmit_drop_count(ret)) 172 if (net_xmit_drop_count(ret))
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index cf4852814e0c..71c1a598d9bc 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -30,7 +30,8 @@ menuconfig IP_SCTP
30 homing at either or both ends of an association." 30 homing at either or both ends of an association."
31 31
32 To compile this protocol support as a module, choose M here: the 32 To compile this protocol support as a module, choose M here: the
33 module will be called sctp. 33 module will be called sctp. Debug messages are handeled by the
34 kernel's dynamic debugging framework.
34 35
35 If in doubt, say N. 36 If in doubt, say N.
36 37
@@ -48,14 +49,6 @@ config NET_SCTPPROBE
48 To compile this code as a module, choose M here: the 49 To compile this code as a module, choose M here: the
49 module will be called sctp_probe. 50 module will be called sctp_probe.
50 51
51config SCTP_DBG_MSG
52 bool "SCTP: Debug messages"
53 help
54 If you say Y, this will enable verbose debugging messages.
55
56 If unsure, say N. However, if you are running into problems, use
57 this option to gather detailed trace information
58
59config SCTP_DBG_OBJCNT 52config SCTP_DBG_OBJCNT
60 bool "SCTP: Debug object counts" 53 bool "SCTP: Debug object counts"
61 depends on PROC_FS 54 depends on PROC_FS
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 91cfd8f94a19..cef509985192 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -43,9 +40,6 @@
43 * Daisy Chang <daisyc@us.ibm.com> 40 * Daisy Chang <daisyc@us.ibm.com>
44 * Ryan Layer <rmlayer@us.ibm.com> 41 * Ryan Layer <rmlayer@us.ibm.com>
45 * Kevin Gao <kevin.gao@intel.com> 42 * Kevin Gao <kevin.gao@intel.com>
46 *
47 * Any bugs reported given to us we will try to fix... any fixes shared will
48 * be incorporated into the next SCTP release.
49 */ 43 */
50 44
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 45#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -86,10 +80,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
86 80
87 /* Discarding const is appropriate here. */ 81 /* Discarding const is appropriate here. */
88 asoc->ep = (struct sctp_endpoint *)ep; 82 asoc->ep = (struct sctp_endpoint *)ep;
89 sctp_endpoint_hold(asoc->ep);
90
91 /* Hold the sock. */
92 asoc->base.sk = (struct sock *)sk; 83 asoc->base.sk = (struct sock *)sk;
84
85 sctp_endpoint_hold(asoc->ep);
93 sock_hold(asoc->base.sk); 86 sock_hold(asoc->base.sk);
94 87
95 /* Initialize the common base substructure. */ 88 /* Initialize the common base substructure. */
@@ -103,13 +96,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
103 sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); 96 sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
104 97
105 asoc->state = SCTP_STATE_CLOSED; 98 asoc->state = SCTP_STATE_CLOSED;
106 99 asoc->cookie_life = ms_to_ktime(sp->assocparams.sasoc_cookie_life);
107 /* Set these values from the socket values, a conversion between
108 * millsecons to seconds/microseconds must also be done.
109 */
110 asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000;
111 asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000)
112 * 1000;
113 asoc->frag_point = 0; 100 asoc->frag_point = 0;
114 asoc->user_frag = sp->user_frag; 101 asoc->user_frag = sp->user_frag;
115 102
@@ -343,8 +330,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
343 return asoc; 330 return asoc;
344 331
345fail_init: 332fail_init:
346 sctp_endpoint_put(asoc->ep);
347 sock_put(asoc->base.sk); 333 sock_put(asoc->base.sk);
334 sctp_endpoint_put(asoc->ep);
348 return NULL; 335 return NULL;
349} 336}
350 337
@@ -356,7 +343,7 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
356{ 343{
357 struct sctp_association *asoc; 344 struct sctp_association *asoc;
358 345
359 asoc = t_new(struct sctp_association, gfp); 346 asoc = kzalloc(sizeof(*asoc), gfp);
360 if (!asoc) 347 if (!asoc)
361 goto fail; 348 goto fail;
362 349
@@ -364,7 +351,8 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
364 goto fail_init; 351 goto fail_init;
365 352
366 SCTP_DBG_OBJCNT_INC(assoc); 353 SCTP_DBG_OBJCNT_INC(assoc);
367 SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); 354
355 pr_debug("Created asoc %p\n", asoc);
368 356
369 return asoc; 357 return asoc;
370 358
@@ -462,7 +450,10 @@ void sctp_association_free(struct sctp_association *asoc)
462/* Cleanup and free up an association. */ 450/* Cleanup and free up an association. */
463static void sctp_association_destroy(struct sctp_association *asoc) 451static void sctp_association_destroy(struct sctp_association *asoc)
464{ 452{
465 SCTP_ASSERT(asoc->base.dead, "Assoc is not dead", return); 453 if (unlikely(!asoc->base.dead)) {
454 WARN(1, "Attempt to destroy undead association %p!\n", asoc);
455 return;
456 }
466 457
467 sctp_endpoint_put(asoc->ep); 458 sctp_endpoint_put(asoc->ep);
468 sock_put(asoc->base.sk); 459 sock_put(asoc->base.sk);
@@ -543,11 +534,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
543 struct list_head *pos; 534 struct list_head *pos;
544 struct sctp_transport *transport; 535 struct sctp_transport *transport;
545 536
546 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_rm_peer:association %p addr: ", 537 pr_debug("%s: association:%p addr:%pISpc\n",
547 " port: %d\n", 538 __func__, asoc, &peer->ipaddr.sa);
548 asoc,
549 (&peer->ipaddr),
550 ntohs(peer->ipaddr.v4.sin_port));
551 539
552 /* If we are to remove the current retran_path, update it 540 /* If we are to remove the current retran_path, update it
553 * to the next peer before removing this peer from the list. 541 * to the next peer before removing this peer from the list.
@@ -643,12 +631,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
643 /* AF_INET and AF_INET6 share common port field. */ 631 /* AF_INET and AF_INET6 share common port field. */
644 port = ntohs(addr->v4.sin_port); 632 port = ntohs(addr->v4.sin_port);
645 633
646 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ", 634 pr_debug("%s: association:%p addr:%pISpc state:%d\n", __func__,
647 " port: %d state:%d\n", 635 asoc, &addr->sa, peer_state);
648 asoc,
649 addr,
650 port,
651 peer_state);
652 636
653 /* Set the port if it has not been set yet. */ 637 /* Set the port if it has not been set yet. */
654 if (0 == asoc->peer.port) 638 if (0 == asoc->peer.port)
@@ -715,8 +699,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
715 else 699 else
716 asoc->pathmtu = peer->pathmtu; 700 asoc->pathmtu = peer->pathmtu;
717 701
718 SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " 702 pr_debug("%s: association:%p PMTU set to %d\n", __func__, asoc,
719 "%d\n", asoc, asoc->pathmtu); 703 asoc->pathmtu);
704
720 peer->pmtu_pending = 0; 705 peer->pmtu_pending = 0;
721 706
722 asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); 707 asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
@@ -855,12 +840,12 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
855 else 840 else
856 spc_state = SCTP_ADDR_AVAILABLE; 841 spc_state = SCTP_ADDR_AVAILABLE;
857 /* Don't inform ULP about transition from PF to 842 /* Don't inform ULP about transition from PF to
858 * active state and set cwnd to 1, see SCTP 843 * active state and set cwnd to 1 MTU, see SCTP
859 * Quick failover draft section 5.1, point 5 844 * Quick failover draft section 5.1, point 5
860 */ 845 */
861 if (transport->state == SCTP_PF) { 846 if (transport->state == SCTP_PF) {
862 ulp_notify = false; 847 ulp_notify = false;
863 transport->cwnd = 1; 848 transport->cwnd = asoc->pathmtu;
864 } 849 }
865 transport->state = SCTP_ACTIVE; 850 transport->state = SCTP_ACTIVE;
866 break; 851 break;
@@ -1356,12 +1341,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1356 else 1341 else
1357 t = asoc->peer.retran_path; 1342 t = asoc->peer.retran_path;
1358 1343
1359 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" 1344 pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc,
1360 " %p addr: ", 1345 &t->ipaddr.sa);
1361 " port: %d\n",
1362 asoc,
1363 (&t->ipaddr),
1364 ntohs(t->ipaddr.v4.sin_port));
1365} 1346}
1366 1347
1367/* Choose the transport for sending retransmit packet. */ 1348/* Choose the transport for sending retransmit packet. */
@@ -1408,8 +1389,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
1408 asoc->frag_point = sctp_frag_point(asoc, pmtu); 1389 asoc->frag_point = sctp_frag_point(asoc, pmtu);
1409 } 1390 }
1410 1391
1411 SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n", 1392 pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc,
1412 __func__, asoc, asoc->pathmtu, asoc->frag_point); 1393 asoc->pathmtu, asoc->frag_point);
1413} 1394}
1414 1395
1415/* Should we send a SACK to update our peer? */ 1396/* Should we send a SACK to update our peer? */
@@ -1461,9 +1442,9 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
1461 asoc->rwnd_press -= change; 1442 asoc->rwnd_press -= change;
1462 } 1443 }
1463 1444
1464 SCTP_DEBUG_PRINTK("%s: asoc %p rwnd increased by %d to (%u, %u) " 1445 pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n",
1465 "- %u\n", __func__, asoc, len, asoc->rwnd, 1446 __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
1466 asoc->rwnd_over, asoc->a_rwnd); 1447 asoc->a_rwnd);
1467 1448
1468 /* Send a window update SACK if the rwnd has increased by at least the 1449 /* Send a window update SACK if the rwnd has increased by at least the
1469 * minimum of the association's PMTU and half of the receive buffer. 1450 * minimum of the association's PMTU and half of the receive buffer.
@@ -1472,9 +1453,11 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
1472 */ 1453 */
1473 if (sctp_peer_needs_update(asoc)) { 1454 if (sctp_peer_needs_update(asoc)) {
1474 asoc->a_rwnd = asoc->rwnd; 1455 asoc->a_rwnd = asoc->rwnd;
1475 SCTP_DEBUG_PRINTK("%s: Sending window update SACK- asoc: %p " 1456
1476 "rwnd: %u a_rwnd: %u\n", __func__, 1457 pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u "
1477 asoc, asoc->rwnd, asoc->a_rwnd); 1458 "a_rwnd:%u\n", __func__, asoc, asoc->rwnd,
1459 asoc->a_rwnd);
1460
1478 sack = sctp_make_sack(asoc); 1461 sack = sctp_make_sack(asoc);
1479 if (!sack) 1462 if (!sack)
1480 return; 1463 return;
@@ -1496,8 +1479,10 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
1496 int rx_count; 1479 int rx_count;
1497 int over = 0; 1480 int over = 0;
1498 1481
1499 SCTP_ASSERT(asoc->rwnd, "rwnd zero", return); 1482 if (unlikely(!asoc->rwnd || asoc->rwnd_over))
1500 SCTP_ASSERT(!asoc->rwnd_over, "rwnd_over not zero", return); 1483 pr_debug("%s: association:%p has asoc->rwnd:%u, "
1484 "asoc->rwnd_over:%u!\n", __func__, asoc,
1485 asoc->rwnd, asoc->rwnd_over);
1501 1486
1502 if (asoc->ep->rcvbuf_policy) 1487 if (asoc->ep->rcvbuf_policy)
1503 rx_count = atomic_read(&asoc->rmem_alloc); 1488 rx_count = atomic_read(&asoc->rmem_alloc);
@@ -1522,9 +1507,10 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
1522 asoc->rwnd_over = len - asoc->rwnd; 1507 asoc->rwnd_over = len - asoc->rwnd;
1523 asoc->rwnd = 0; 1508 asoc->rwnd = 0;
1524 } 1509 }
1525 SCTP_DEBUG_PRINTK("%s: asoc %p rwnd decreased by %d to (%u, %u, %u)\n", 1510
1526 __func__, asoc, len, asoc->rwnd, 1511 pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n",
1527 asoc->rwnd_over, asoc->rwnd_press); 1512 __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
1513 asoc->rwnd_press);
1528} 1514}
1529 1515
1530/* Build the bind address list for the association based on info from the 1516/* Build the bind address list for the association based on info from the
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index ba1dfc3f8def..8c4fa5dec824 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -22,16 +22,10 @@
22 * 22 *
23 * Please send any bug reports or fixes you make to the 23 * Please send any bug reports or fixes you make to the
24 * email address(es): 24 * email address(es):
25 * lksctp developers <lksctp-developers@lists.sourceforge.net> 25 * lksctp developers <linux-sctp@vger.kernel.org>
26 *
27 * Or submit a bug report through the following website:
28 * http://www.sf.net/projects/lksctp
29 * 26 *
30 * Written or modified by: 27 * Written or modified by:
31 * Vlad Yasevich <vladislav.yasevich@hp.com> 28 * Vlad Yasevich <vladislav.yasevich@hp.com>
32 *
33 * Any bugs reported given to us we will try to fix... any fixes shared will
34 * be incorporated into the next SCTP release.
35 */ 29 */
36 30
37#include <linux/slab.h> 31#include <linux/slab.h>
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 41145fe31813..077bb070052b 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -27,19 +27,13 @@
27 * 27 *
28 * Please send any bug reports or fixes you make to the 28 * Please send any bug reports or fixes you make to the
29 * email address(es): 29 * email address(es):
30 * lksctp developers <lksctp-developers@lists.sourceforge.net> 30 * lksctp developers <linux-sctp@vger.kernel.org>
31 *
32 * Or submit a bug report through the following website:
33 * http://www.sf.net/projects/lksctp
34 * 31 *
35 * Written or modified by: 32 * Written or modified by:
36 * La Monte H.P. Yarroll <piggy@acm.org> 33 * La Monte H.P. Yarroll <piggy@acm.org>
37 * Karl Knutson <karl@athena.chicago.il.us> 34 * Karl Knutson <karl@athena.chicago.il.us>
38 * Jon Grimm <jgrimm@us.ibm.com> 35 * Jon Grimm <jgrimm@us.ibm.com>
39 * Daisy Chang <daisyc@us.ibm.com> 36 * Daisy Chang <daisyc@us.ibm.com>
40 *
41 * Any bugs reported given to us we will try to fix... any fixes shared will
42 * be incorporated into the next SCTP release.
43 */ 37 */
44 38
45#include <linux/types.h> 39#include <linux/types.h>
@@ -162,7 +156,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
162 struct sctp_sockaddr_entry *addr; 156 struct sctp_sockaddr_entry *addr;
163 157
164 /* Add the address to the bind address list. */ 158 /* Add the address to the bind address list. */
165 addr = t_new(struct sctp_sockaddr_entry, gfp); 159 addr = kzalloc(sizeof(*addr), gfp);
166 if (!addr) 160 if (!addr)
167 return -ENOMEM; 161 return -ENOMEM;
168 162
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 69ce21e3716f..7bd5ed4a8657 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -24,17 +24,11 @@
24 * 24 *
25 * Please send any bug reports or fixes you make to the 25 * Please send any bug reports or fixes you make to the
26 * email address(es): 26 * email address(es):
27 * lksctp developers <lksctp-developers@lists.sourceforge.net> 27 * lksctp developers <linux-sctp@vger.kernel.org>
28 *
29 * Or submit a bug report through the following website:
30 * http://www.sf.net/projects/lksctp
31 * 28 *
32 * Written or modified by: 29 * Written or modified by:
33 * Jon Grimm <jgrimm@us.ibm.com> 30 * Jon Grimm <jgrimm@us.ibm.com>
34 * Sridhar Samudrala <sri@us.ibm.com> 31 * Sridhar Samudrala <sri@us.ibm.com>
35 *
36 * Any bugs reported given to us we will try to fix... any fixes shared will
37 * be incorporated into the next SCTP release.
38 */ 32 */
39 33
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -66,7 +60,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
66} 60}
67 61
68/* Allocate and initialize datamsg. */ 62/* Allocate and initialize datamsg. */
69SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) 63static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp)
70{ 64{
71 struct sctp_datamsg *msg; 65 struct sctp_datamsg *msg;
72 msg = kmalloc(sizeof(struct sctp_datamsg), gfp); 66 msg = kmalloc(sizeof(struct sctp_datamsg), gfp);
@@ -193,16 +187,17 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
193 msg->expires_at = jiffies + 187 msg->expires_at = jiffies +
194 msecs_to_jiffies(sinfo->sinfo_timetolive); 188 msecs_to_jiffies(sinfo->sinfo_timetolive);
195 msg->can_abandon = 1; 189 msg->can_abandon = 1;
196 SCTP_DEBUG_PRINTK("%s: msg:%p expires_at: %ld jiffies:%ld\n", 190
197 __func__, msg, msg->expires_at, jiffies); 191 pr_debug("%s: msg:%p expires_at:%ld jiffies:%ld\n", __func__,
192 msg, msg->expires_at, jiffies);
198 } 193 }
199 194
200 /* This is the biggest possible DATA chunk that can fit into 195 /* This is the biggest possible DATA chunk that can fit into
201 * the packet 196 * the packet
202 */ 197 */
203 max_data = asoc->pathmtu - 198 max_data = (asoc->pathmtu -
204 sctp_sk(asoc->base.sk)->pf->af->net_header_len - 199 sctp_sk(asoc->base.sk)->pf->af->net_header_len -
205 sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk); 200 sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3;
206 201
207 max = asoc->frag_point; 202 max = asoc->frag_point;
208 /* If the the peer requested that we authenticate DATA chunks 203 /* If the the peer requested that we authenticate DATA chunks
diff --git a/net/sctp/command.c b/net/sctp/command.c
index c0044019db9e..3d9a9ff69c03 100644
--- a/net/sctp/command.c
+++ b/net/sctp/command.c
@@ -25,17 +25,11 @@
25 * 25 *
26 * Please send any bug reports or fixes you make to the 26 * Please send any bug reports or fixes you make to the
27 * email address(es): 27 * email address(es):
28 * lksctp developers <lksctp-developers@lists.sourceforge.net> 28 * lksctp developers <linux-sctp@vger.kernel.org>
29 *
30 * Or submit a bug report through the following website:
31 * http://www.sf.net/projects/lksctp
32 * 29 *
33 * Written or modified by: 30 * Written or modified by:
34 * La Monte H.P. Yarroll <piggy@acm.org> 31 * La Monte H.P. Yarroll <piggy@acm.org>
35 * Karl Knutson <karl@athena.chicago.il.us> 32 * Karl Knutson <karl@athena.chicago.il.us>
36 *
37 * Any bugs reported given to us we will try to fix... any fixes shared will
38 * be incorporated into the next SCTP release.
39 */ 33 */
40 34
41#include <linux/types.h> 35#include <linux/types.h>
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index ec997cfe0a7e..e89015d8935a 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -40,17 +37,10 @@
40 * Jon Grimm <jgrimm@us.ibm.com> 37 * Jon Grimm <jgrimm@us.ibm.com>
41 * Daisy Chang <daisyc@us.ibm.com> 38 * Daisy Chang <daisyc@us.ibm.com>
42 * Sridhar Samudrala <sri@us.ibm.com> 39 * Sridhar Samudrala <sri@us.ibm.com>
43 *
44 * Any bugs reported given to us we will try to fix... any fixes shared will
45 * be incorporated into the next SCTP release.
46 */ 40 */
47 41
48#include <net/sctp/sctp.h> 42#include <net/sctp/sctp.h>
49 43
50#if SCTP_DEBUG
51int sctp_debug_flag = 1; /* Initially enable DEBUG */
52#endif /* SCTP_DEBUG */
53
54/* These are printable forms of Chunk ID's from section 3.1. */ 44/* These are printable forms of Chunk ID's from section 3.1. */
55static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = { 45static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = {
56 "DATA", 46 "DATA",
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 5fbd7bc6bb11..09b8daac87c8 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -29,10 +29,7 @@
29 * 29 *
30 * Please send any bug reports or fixes you make to the 30 * Please send any bug reports or fixes you make to the
31 * email address(es): 31 * email address(es):
32 * lksctp developers <lksctp-developers@lists.sourceforge.net> 32 * lksctp developers <linux-sctp@vger.kernel.org>
33 *
34 * Or submit a bug report through the following website:
35 * http://www.sf.net/projects/lksctp
36 * 33 *
37 * Written or modified by: 34 * Written or modified by:
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -40,9 +37,6 @@
40 * Jon Grimm <jgrimm@austin.ibm.com> 37 * Jon Grimm <jgrimm@austin.ibm.com>
41 * Daisy Chang <daisyc@us.ibm.com> 38 * Daisy Chang <daisyc@us.ibm.com>
42 * Dajiang Zhang <dajiang.zhang@nokia.com> 39 * Dajiang Zhang <dajiang.zhang@nokia.com>
43 *
44 * Any bugs reported given to us we will try to fix... any fixes shared will
45 * be incorporated into the next SCTP release.
46 */ 40 */
47 41
48#include <linux/types.h> 42#include <linux/types.h>
@@ -192,9 +186,10 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp)
192 struct sctp_endpoint *ep; 186 struct sctp_endpoint *ep;
193 187
194 /* Build a local endpoint. */ 188 /* Build a local endpoint. */
195 ep = t_new(struct sctp_endpoint, gfp); 189 ep = kzalloc(sizeof(*ep), gfp);
196 if (!ep) 190 if (!ep)
197 goto fail; 191 goto fail;
192
198 if (!sctp_endpoint_init(ep, sk, gfp)) 193 if (!sctp_endpoint_init(ep, sk, gfp))
199 goto fail_init; 194 goto fail_init;
200 195
@@ -246,10 +241,12 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
246/* Final destructor for endpoint. */ 241/* Final destructor for endpoint. */
247static void sctp_endpoint_destroy(struct sctp_endpoint *ep) 242static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
248{ 243{
249 SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); 244 struct sock *sk;
250 245
251 /* Free up the HMAC transform. */ 246 if (unlikely(!ep->base.dead)) {
252 crypto_free_hash(sctp_sk(ep->base.sk)->hmac); 247 WARN(1, "Attempt to destroy undead endpoint %p!\n", ep);
248 return;
249 }
253 250
254 /* Free the digest buffer */ 251 /* Free the digest buffer */
255 kfree(ep->digest); 252 kfree(ep->digest);
@@ -270,13 +267,15 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
270 267
271 memset(ep->secret_key, 0, sizeof(ep->secret_key)); 268 memset(ep->secret_key, 0, sizeof(ep->secret_key));
272 269
273 /* Remove and free the port */
274 if (sctp_sk(ep->base.sk)->bind_hash)
275 sctp_put_port(ep->base.sk);
276
277 /* Give up our hold on the sock. */ 270 /* Give up our hold on the sock. */
278 if (ep->base.sk) 271 sk = ep->base.sk;
279 sock_put(ep->base.sk); 272 if (sk != NULL) {
273 /* Remove and free the port */
274 if (sctp_sk(sk)->bind_hash)
275 sctp_put_port(sk);
276
277 sock_put(sk);
278 }
280 279
281 kfree(ep); 280 kfree(ep);
282 SCTP_DBG_OBJCNT_DEC(ep); 281 SCTP_DBG_OBJCNT_DEC(ep);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 4b2c83146aa7..98b69bbecdd9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -29,10 +29,7 @@
29 * 29 *
30 * Please send any bug reports or fixes you make to the 30 * Please send any bug reports or fixes you make to the
31 * email address(es): 31 * email address(es):
32 * lksctp developers <lksctp-developers@lists.sourceforge.net> 32 * lksctp developers <linux-sctp@vger.kernel.org>
33 *
34 * Or submit a bug report through the following website:
35 * http://www.sf.net/projects/lksctp
36 * 33 *
37 * Written or modified by: 34 * Written or modified by:
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -43,9 +40,6 @@
43 * Daisy Chang <daisyc@us.ibm.com> 40 * Daisy Chang <daisyc@us.ibm.com>
44 * Sridhar Samudrala <sri@us.ibm.com> 41 * Sridhar Samudrala <sri@us.ibm.com>
45 * Ardelle Fan <ardelle.fan@intel.com> 42 * Ardelle Fan <ardelle.fan@intel.com>
46 *
47 * Any bugs reported given to us we will try to fix... any fixes shared will
48 * be incorporated into the next SCTP release.
49 */ 43 */
50 44
51#include <linux/types.h> 45#include <linux/types.h>
@@ -87,15 +81,7 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
87{ 81{
88 struct sctphdr *sh = sctp_hdr(skb); 82 struct sctphdr *sh = sctp_hdr(skb);
89 __le32 cmp = sh->checksum; 83 __le32 cmp = sh->checksum;
90 struct sk_buff *list; 84 __le32 val = sctp_compute_cksum(skb, 0);
91 __le32 val;
92 __u32 tmp = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
93
94 skb_walk_frags(skb, list)
95 tmp = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
96 tmp);
97
98 val = sctp_end_cksum(tmp);
99 85
100 if (val != cmp) { 86 if (val != cmp) {
101 /* CRC failure, dump it. */ 87 /* CRC failure, dump it. */
@@ -454,8 +440,6 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
454 struct sctp_association *asoc, 440 struct sctp_association *asoc,
455 struct sctp_transport *t) 441 struct sctp_transport *t)
456{ 442{
457 SCTP_DEBUG_PRINTK("%s\n", __func__);
458
459 if (sock_owned_by_user(sk)) { 443 if (sock_owned_by_user(sk)) {
460 if (timer_pending(&t->proto_unreach_timer)) 444 if (timer_pending(&t->proto_unreach_timer))
461 return; 445 return;
@@ -464,10 +448,12 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
464 jiffies + (HZ/20))) 448 jiffies + (HZ/20)))
465 sctp_association_hold(asoc); 449 sctp_association_hold(asoc);
466 } 450 }
467
468 } else { 451 } else {
469 struct net *net = sock_net(sk); 452 struct net *net = sock_net(sk);
470 453
454 pr_debug("%s: unrecognized next header type "
455 "encountered!\n", __func__);
456
471 if (del_timer(&t->proto_unreach_timer)) 457 if (del_timer(&t->proto_unreach_timer))
472 sctp_association_put(asoc); 458 sctp_association_put(asoc);
473 459
@@ -589,7 +575,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
589 struct sctp_association *asoc = NULL; 575 struct sctp_association *asoc = NULL;
590 struct sctp_transport *transport; 576 struct sctp_transport *transport;
591 struct inet_sock *inet; 577 struct inet_sock *inet;
592 sk_buff_data_t saveip, savesctp; 578 __u16 saveip, savesctp;
593 int err; 579 int err;
594 struct net *net = dev_net(skb->dev); 580 struct net *net = dev_net(skb->dev);
595 581
@@ -648,8 +634,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
648 break; 634 break;
649 case ICMP_REDIRECT: 635 case ICMP_REDIRECT:
650 sctp_icmp_redirect(sk, transport, skb); 636 sctp_icmp_redirect(sk, transport, skb);
651 err = 0; 637 /* Fall through to out_unlock. */
652 break;
653 default: 638 default:
654 goto out_unlock; 639 goto out_unlock;
655 } 640 }
@@ -903,11 +888,11 @@ hit:
903} 888}
904 889
905/* Look up an association. BH-safe. */ 890/* Look up an association. BH-safe. */
906SCTP_STATIC 891static
907struct sctp_association *sctp_lookup_association(struct net *net, 892struct sctp_association *sctp_lookup_association(struct net *net,
908 const union sctp_addr *laddr, 893 const union sctp_addr *laddr,
909 const union sctp_addr *paddr, 894 const union sctp_addr *paddr,
910 struct sctp_transport **transportp) 895 struct sctp_transport **transportp)
911{ 896{
912 struct sctp_association *asoc; 897 struct sctp_association *asoc;
913 898
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 3221d073448c..5856932fdc38 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -30,17 +30,11 @@
30 * 30 *
31 * Please send any bug reports or fixes you make to the 31 * Please send any bug reports or fixes you make to the
32 * email address(es): 32 * email address(es):
33 * lksctp developers <lksctp-developers@lists.sourceforge.net> 33 * lksctp developers <linux-sctp@vger.kernel.org>
34 *
35 * Or submit a bug report through the following website:
36 * http://www.sf.net/projects/lksctp
37 * 34 *
38 * Written or modified by: 35 * Written or modified by:
39 * La Monte H.P. Yarroll <piggy@acm.org> 36 * La Monte H.P. Yarroll <piggy@acm.org>
40 * Karl Knutson <karl@athena.chicago.il.us> 37 * Karl Knutson <karl@athena.chicago.il.us>
41 *
42 * Any bugs reported given to us we will try to fix... any fixes shared will
43 * be incorporated into the next SCTP release.
44 */ 38 */
45 39
46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -219,10 +213,10 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
219 chunk->end_of_packet = 1; 213 chunk->end_of_packet = 1;
220 } 214 }
221 215
222 SCTP_DEBUG_PRINTK("+++sctp_inq_pop+++ chunk %p[%s]," 216 pr_debug("+++sctp_inq_pop+++ chunk:%p[%s], length:%d, skb->len:%d\n",
223 " length %d, skb->len %d\n",chunk, 217 chunk, sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
224 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), 218 ntohs(chunk->chunk_hdr->length), chunk->skb->len);
225 ntohs(chunk->chunk_hdr->length), chunk->skb->len); 219
226 return chunk; 220 return chunk;
227} 221}
228 222
@@ -238,4 +232,3 @@ void sctp_inq_set_th_handler(struct sctp_inq *q, work_func_t callback)
238{ 232{
239 INIT_WORK(&q->immediate, callback); 233 INIT_WORK(&q->immediate, callback);
240} 234}
241
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 391a245d5203..e7b2d4fe2b6a 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -27,10 +27,7 @@
27 * 27 *
28 * Please send any bug reports or fixes you make to the 28 * Please send any bug reports or fixes you make to the
29 * email address(es): 29 * email address(es):
30 * lksctp developers <lksctp-developers@lists.sourceforge.net> 30 * lksctp developers <linux-sctp@vger.kernel.org>
31 *
32 * Or submit a bug report through the following website:
33 * http://www.sf.net/projects/lksctp
34 * 31 *
35 * Written or modified by: 32 * Written or modified by:
36 * Le Yanqun <yanqun.le@nokia.com> 33 * Le Yanqun <yanqun.le@nokia.com>
@@ -42,9 +39,6 @@
42 * 39 *
43 * Based on: 40 * Based on:
44 * linux/net/ipv6/tcp_ipv6.c 41 * linux/net/ipv6/tcp_ipv6.c
45 *
46 * Any bugs reported given to us we will try to fix... any fixes shared will
47 * be incorporated into the next SCTP release.
48 */ 42 */
49 43
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -145,15 +139,15 @@ static struct notifier_block sctp_inet6addr_notifier = {
145}; 139};
146 140
147/* ICMP error handler. */ 141/* ICMP error handler. */
148SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 142static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
149 u8 type, u8 code, int offset, __be32 info) 143 u8 type, u8 code, int offset, __be32 info)
150{ 144{
151 struct inet6_dev *idev; 145 struct inet6_dev *idev;
152 struct sock *sk; 146 struct sock *sk;
153 struct sctp_association *asoc; 147 struct sctp_association *asoc;
154 struct sctp_transport *transport; 148 struct sctp_transport *transport;
155 struct ipv6_pinfo *np; 149 struct ipv6_pinfo *np;
156 sk_buff_data_t saveip, savesctp; 150 __u16 saveip, savesctp;
157 int err; 151 int err;
158 struct net *net = dev_net(skb->dev); 152 struct net *net = dev_net(skb->dev);
159 153
@@ -189,7 +183,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
189 break; 183 break;
190 case NDISC_REDIRECT: 184 case NDISC_REDIRECT:
191 sctp_icmp_redirect(sk, transport, skb); 185 sctp_icmp_redirect(sk, transport, skb);
192 break; 186 goto out_unlock;
193 default: 187 default:
194 break; 188 break;
195 } 189 }
@@ -210,45 +204,23 @@ out:
210 in6_dev_put(idev); 204 in6_dev_put(idev);
211} 205}
212 206
213/* Based on tcp_v6_xmit() in tcp_ipv6.c. */
214static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) 207static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
215{ 208{
216 struct sock *sk = skb->sk; 209 struct sock *sk = skb->sk;
217 struct ipv6_pinfo *np = inet6_sk(sk); 210 struct ipv6_pinfo *np = inet6_sk(sk);
218 struct flowi6 fl6; 211 struct flowi6 *fl6 = &transport->fl.u.ip6;
219 212
220 memset(&fl6, 0, sizeof(fl6)); 213 pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
214 skb->len, &fl6->saddr, &fl6->daddr);
221 215
222 fl6.flowi6_proto = sk->sk_protocol; 216 IP6_ECN_flow_xmit(sk, fl6->flowlabel);
223
224 /* Fill in the dest address from the route entry passed with the skb
225 * and the source address from the transport.
226 */
227 fl6.daddr = transport->ipaddr.v6.sin6_addr;
228 fl6.saddr = transport->saddr.v6.sin6_addr;
229
230 fl6.flowlabel = np->flow_label;
231 IP6_ECN_flow_xmit(sk, fl6.flowlabel);
232 if (ipv6_addr_type(&fl6.saddr) & IPV6_ADDR_LINKLOCAL)
233 fl6.flowi6_oif = transport->saddr.v6.sin6_scope_id;
234 else
235 fl6.flowi6_oif = sk->sk_bound_dev_if;
236
237 if (np->opt && np->opt->srcrt) {
238 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
239 fl6.daddr = *rt0->addr;
240 }
241
242 SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n",
243 __func__, skb, skb->len,
244 &fl6.saddr, &fl6.daddr);
245
246 SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
247 217
248 if (!(transport->param_flags & SPP_PMTUD_ENABLE)) 218 if (!(transport->param_flags & SPP_PMTUD_ENABLE))
249 skb->local_df = 1; 219 skb->local_df = 1;
250 220
251 return ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); 221 SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
222
223 return ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
252} 224}
253 225
254/* Returns the dst cache entry for the given source and destination ip 226/* Returns the dst cache entry for the given source and destination ip
@@ -261,10 +233,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
261 struct dst_entry *dst = NULL; 233 struct dst_entry *dst = NULL;
262 struct flowi6 *fl6 = &fl->u.ip6; 234 struct flowi6 *fl6 = &fl->u.ip6;
263 struct sctp_bind_addr *bp; 235 struct sctp_bind_addr *bp;
236 struct ipv6_pinfo *np = inet6_sk(sk);
264 struct sctp_sockaddr_entry *laddr; 237 struct sctp_sockaddr_entry *laddr;
265 union sctp_addr *baddr = NULL; 238 union sctp_addr *baddr = NULL;
266 union sctp_addr *daddr = &t->ipaddr; 239 union sctp_addr *daddr = &t->ipaddr;
267 union sctp_addr dst_saddr; 240 union sctp_addr dst_saddr;
241 struct in6_addr *final_p, final;
268 __u8 matchlen = 0; 242 __u8 matchlen = 0;
269 __u8 bmatchlen; 243 __u8 bmatchlen;
270 sctp_scope_t scope; 244 sctp_scope_t scope;
@@ -276,7 +250,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
276 if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) 250 if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
277 fl6->flowi6_oif = daddr->v6.sin6_scope_id; 251 fl6->flowi6_oif = daddr->v6.sin6_scope_id;
278 252
279 SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6->daddr); 253 pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr);
280 254
281 if (asoc) 255 if (asoc)
282 fl6->fl6_sport = htons(asoc->base.bind_addr.port); 256 fl6->fl6_sport = htons(asoc->base.bind_addr.port);
@@ -284,10 +258,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
284 if (saddr) { 258 if (saddr) {
285 fl6->saddr = saddr->v6.sin6_addr; 259 fl6->saddr = saddr->v6.sin6_addr;
286 fl6->fl6_sport = saddr->v6.sin6_port; 260 fl6->fl6_sport = saddr->v6.sin6_port;
287 SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr); 261
262 pr_debug("src=%pI6 - ", &fl6->saddr);
288 } 263 }
289 264
290 dst = ip6_dst_lookup_flow(sk, fl6, NULL, false); 265 final_p = fl6_update_dst(fl6, np->opt, &final);
266 dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
291 if (!asoc || saddr) 267 if (!asoc || saddr)
292 goto out; 268 goto out;
293 269
@@ -339,22 +315,27 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
339 } 315 }
340 } 316 }
341 rcu_read_unlock(); 317 rcu_read_unlock();
318
342 if (baddr) { 319 if (baddr) {
343 fl6->saddr = baddr->v6.sin6_addr; 320 fl6->saddr = baddr->v6.sin6_addr;
344 fl6->fl6_sport = baddr->v6.sin6_port; 321 fl6->fl6_sport = baddr->v6.sin6_port;
345 dst = ip6_dst_lookup_flow(sk, fl6, NULL, false); 322 final_p = fl6_update_dst(fl6, np->opt, &final);
323 dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
346 } 324 }
347 325
348out: 326out:
349 if (!IS_ERR_OR_NULL(dst)) { 327 if (!IS_ERR_OR_NULL(dst)) {
350 struct rt6_info *rt; 328 struct rt6_info *rt;
329
351 rt = (struct rt6_info *)dst; 330 rt = (struct rt6_info *)dst;
352 t->dst = dst; 331 t->dst = dst;
353 SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n", 332 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
354 &rt->rt6i_dst.addr, &fl6->saddr); 333 pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr,
334 &fl6->saddr);
355 } else { 335 } else {
356 t->dst = NULL; 336 t->dst = NULL;
357 SCTP_DEBUG_PRINTK("NO ROUTE\n"); 337
338 pr_debug("no route\n");
358 } 339 }
359} 340}
360 341
@@ -377,7 +358,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
377 struct flowi6 *fl6 = &fl->u.ip6; 358 struct flowi6 *fl6 = &fl->u.ip6;
378 union sctp_addr *saddr = &t->saddr; 359 union sctp_addr *saddr = &t->saddr;
379 360
380 SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst); 361 pr_debug("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst);
381 362
382 if (t->dst) { 363 if (t->dst) {
383 saddr->v6.sin6_family = AF_INET6; 364 saddr->v6.sin6_family = AF_INET6;
@@ -402,7 +383,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
402 read_lock_bh(&in6_dev->lock); 383 read_lock_bh(&in6_dev->lock);
403 list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { 384 list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
404 /* Add the address to the local list. */ 385 /* Add the address to the local list. */
405 addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); 386 addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
406 if (addr) { 387 if (addr) {
407 addr->a.v6.sin6_family = AF_INET6; 388 addr->a.v6.sin6_family = AF_INET6;
408 addr->a.v6.sin6_port = 0; 389 addr->a.v6.sin6_port = 0;
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index fe012c44f8df..5ea573b37648 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -26,16 +26,10 @@
26 * 26 *
27 * Please send any bug reports or fixes you make to the 27 * Please send any bug reports or fixes you make to the
28 * email address(es): 28 * email address(es):
29 * lksctp developers <lksctp-developers@lists.sourceforge.net> 29 * lksctp developers <linux-sctp@vger.kernel.org>
30 *
31 * Or submit a bug report through the following website:
32 * http://www.sf.net/projects/lksctp
33 * 30 *
34 * Written or modified by: 31 * Written or modified by:
35 * Jon Grimm <jgrimm@us.ibm.com> 32 * Jon Grimm <jgrimm@us.ibm.com>
36 *
37 * Any bugs reported given to us we will try to fix... any fixes shared will
38 * be incorporated into the next SCTP release.
39 */ 33 */
40 34
41#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 35#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/output.c b/net/sctp/output.c
index bbef4a7a9b56..0ac3a65daccb 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -26,19 +26,13 @@
26 * 26 *
27 * Please send any bug reports or fixes you make to the 27 * Please send any bug reports or fixes you make to the
28 * email address(es): 28 * email address(es):
29 * lksctp developers <lksctp-developers@lists.sourceforge.net> 29 * lksctp developers <linux-sctp@vger.kernel.org>
30 *
31 * Or submit a bug report through the following website:
32 * http://www.sf.net/projects/lksctp
33 * 30 *
34 * Written or modified by: 31 * Written or modified by:
35 * La Monte H.P. Yarroll <piggy@acm.org> 32 * La Monte H.P. Yarroll <piggy@acm.org>
36 * Karl Knutson <karl@athena.chicago.il.us> 33 * Karl Knutson <karl@athena.chicago.il.us>
37 * Jon Grimm <jgrimm@austin.ibm.com> 34 * Jon Grimm <jgrimm@austin.ibm.com>
38 * Sridhar Samudrala <sri@us.ibm.com> 35 * Sridhar Samudrala <sri@us.ibm.com>
39 *
40 * Any bugs reported given to us we will try to fix... any fixes shared will
41 * be incorporated into the next SCTP release.
42 */ 36 */
43 37
44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 38#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -93,8 +87,7 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
93{ 87{
94 struct sctp_chunk *chunk = NULL; 88 struct sctp_chunk *chunk = NULL;
95 89
96 SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__, 90 pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
97 packet, vtag);
98 91
99 packet->vtag = vtag; 92 packet->vtag = vtag;
100 93
@@ -119,8 +112,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
119 struct sctp_association *asoc = transport->asoc; 112 struct sctp_association *asoc = transport->asoc;
120 size_t overhead; 113 size_t overhead;
121 114
122 SCTP_DEBUG_PRINTK("%s: packet:%p transport:%p\n", __func__, 115 pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport);
123 packet, transport);
124 116
125 packet->transport = transport; 117 packet->transport = transport;
126 packet->source_port = sport; 118 packet->source_port = sport;
@@ -145,7 +137,7 @@ void sctp_packet_free(struct sctp_packet *packet)
145{ 137{
146 struct sctp_chunk *chunk, *tmp; 138 struct sctp_chunk *chunk, *tmp;
147 139
148 SCTP_DEBUG_PRINTK("%s: packet:%p\n", __func__, packet); 140 pr_debug("%s: packet:%p\n", __func__, packet);
149 141
150 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { 142 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
151 list_del_init(&chunk->list); 143 list_del_init(&chunk->list);
@@ -167,8 +159,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
167 sctp_xmit_t retval; 159 sctp_xmit_t retval;
168 int error = 0; 160 int error = 0;
169 161
170 SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __func__, 162 pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
171 packet, chunk);
172 163
173 switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { 164 switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
174 case SCTP_XMIT_PMTU_FULL: 165 case SCTP_XMIT_PMTU_FULL:
@@ -334,8 +325,7 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
334{ 325{
335 sctp_xmit_t retval = SCTP_XMIT_OK; 326 sctp_xmit_t retval = SCTP_XMIT_OK;
336 327
337 SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __func__, packet, 328 pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
338 chunk);
339 329
340 /* Data chunks are special. Before seeing what else we can 330 /* Data chunks are special. Before seeing what else we can
341 * bundle into this packet, check to see if we are allowed to 331 * bundle into this packet, check to see if we are allowed to
@@ -402,7 +392,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
402 unsigned char *auth = NULL; /* pointer to auth in skb data */ 392 unsigned char *auth = NULL; /* pointer to auth in skb data */
403 __u32 cksum_buf_len = sizeof(struct sctphdr); 393 __u32 cksum_buf_len = sizeof(struct sctphdr);
404 394
405 SCTP_DEBUG_PRINTK("%s: packet:%p\n", __func__, packet); 395 pr_debug("%s: packet:%p\n", __func__, packet);
406 396
407 /* Do NOT generate a chunkless packet. */ 397 /* Do NOT generate a chunkless packet. */
408 if (list_empty(&packet->chunk_list)) 398 if (list_empty(&packet->chunk_list))
@@ -472,7 +462,9 @@ int sctp_packet_transmit(struct sctp_packet *packet)
472 * 462 *
473 * [This whole comment explains WORD_ROUND() below.] 463 * [This whole comment explains WORD_ROUND() below.]
474 */ 464 */
475 SCTP_DEBUG_PRINTK("***sctp_transmit_packet***\n"); 465
466 pr_debug("***sctp_transmit_packet***\n");
467
476 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { 468 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
477 list_del_init(&chunk->list); 469 list_del_init(&chunk->list);
478 if (sctp_chunk_is_data(chunk)) { 470 if (sctp_chunk_is_data(chunk)) {
@@ -505,16 +497,13 @@ int sctp_packet_transmit(struct sctp_packet *packet)
505 memcpy(skb_put(nskb, chunk->skb->len), 497 memcpy(skb_put(nskb, chunk->skb->len),
506 chunk->skb->data, chunk->skb->len); 498 chunk->skb->data, chunk->skb->len);
507 499
508 SCTP_DEBUG_PRINTK("%s %p[%s] %s 0x%x, %s %d, %s %d, %s %d\n", 500 pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, "
509 "*** Chunk", chunk, 501 "rtt_in_progress:%d\n", chunk,
510 sctp_cname(SCTP_ST_CHUNK( 502 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
511 chunk->chunk_hdr->type)), 503 chunk->has_tsn ? "TSN" : "No TSN",
512 chunk->has_tsn ? "TSN" : "No TSN", 504 chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
513 chunk->has_tsn ? 505 ntohs(chunk->chunk_hdr->length), chunk->skb->len,
514 ntohl(chunk->subh.data_hdr->tsn) : 0, 506 chunk->rtt_in_progress);
515 "length", ntohs(chunk->chunk_hdr->length),
516 "chunk->skb->len", chunk->skb->len,
517 "rtt_in_progress", chunk->rtt_in_progress);
518 507
519 /* 508 /*
520 * If this is a control chunk, this is our last 509 * If this is a control chunk, this is our last
@@ -606,8 +595,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
606 } 595 }
607 } 596 }
608 597
609 SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", 598 pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
610 nskb->len);
611 599
612 nskb->local_df = packet->ipfragok; 600 nskb->local_df = packet->ipfragok;
613 (*tp->af_specific->sctp_xmit)(nskb, tp); 601 (*tp->af_specific->sctp_xmit)(nskb, tp);
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 32a4625fef77..94df75877869 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -41,9 +38,6 @@
41 * Hui Huang <hui.huang@nokia.com> 38 * Hui Huang <hui.huang@nokia.com>
42 * Sridhar Samudrala <sri@us.ibm.com> 39 * Sridhar Samudrala <sri@us.ibm.com>
43 * Jon Grimm <jgrimm@us.ibm.com> 40 * Jon Grimm <jgrimm@us.ibm.com>
44 *
45 * Any bugs reported given to us we will try to fix... any fixes shared will
46 * be incorporated into the next SCTP release.
47 */ 41 */
48 42
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -206,6 +200,8 @@ static inline int sctp_cacc_skip(struct sctp_transport *primary,
206 */ 200 */
207void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) 201void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
208{ 202{
203 memset(q, 0, sizeof(struct sctp_outq));
204
209 q->asoc = asoc; 205 q->asoc = asoc;
210 INIT_LIST_HEAD(&q->out_chunk_list); 206 INIT_LIST_HEAD(&q->out_chunk_list);
211 INIT_LIST_HEAD(&q->control_chunk_list); 207 INIT_LIST_HEAD(&q->control_chunk_list);
@@ -213,11 +209,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
213 INIT_LIST_HEAD(&q->sacked); 209 INIT_LIST_HEAD(&q->sacked);
214 INIT_LIST_HEAD(&q->abandoned); 210 INIT_LIST_HEAD(&q->abandoned);
215 211
216 q->fast_rtx = 0;
217 q->outstanding_bytes = 0;
218 q->empty = 1; 212 q->empty = 1;
219 q->cork = 0;
220 q->out_qlen = 0;
221} 213}
222 214
223/* Free the outqueue structure and any related pending chunks. 215/* Free the outqueue structure and any related pending chunks.
@@ -301,10 +293,10 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
301 struct net *net = sock_net(q->asoc->base.sk); 293 struct net *net = sock_net(q->asoc->base.sk);
302 int error = 0; 294 int error = 0;
303 295
304 SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n", 296 pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk,
305 q, chunk, chunk && chunk->chunk_hdr ? 297 chunk && chunk->chunk_hdr ?
306 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) 298 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
307 : "Illegal Chunk"); 299 "illegal chunk");
308 300
309 /* If it is data, queue it up, otherwise, send it 301 /* If it is data, queue it up, otherwise, send it
310 * immediately. 302 * immediately.
@@ -330,10 +322,10 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
330 break; 322 break;
331 323
332 default: 324 default:
333 SCTP_DEBUG_PRINTK("outqueueing (%p, %p[%s])\n", 325 pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n",
334 q, chunk, chunk && chunk->chunk_hdr ? 326 __func__, q, chunk, chunk && chunk->chunk_hdr ?
335 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) 327 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
336 : "Illegal Chunk"); 328 "illegal chunk");
337 329
338 sctp_outq_tail_data(q, chunk); 330 sctp_outq_tail_data(q, chunk);
339 if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) 331 if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
@@ -462,14 +454,10 @@ void sctp_retransmit_mark(struct sctp_outq *q,
462 } 454 }
463 } 455 }
464 456
465 SCTP_DEBUG_PRINTK("%s: transport: %p, reason: %d, " 457 pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d, "
466 "cwnd: %d, ssthresh: %d, flight_size: %d, " 458 "flight_size:%d, pba:%d\n", __func__, transport, reason,
467 "pba: %d\n", __func__, 459 transport->cwnd, transport->ssthresh, transport->flight_size,
468 transport, reason, 460 transport->partial_bytes_acked);
469 transport->cwnd, transport->ssthresh,
470 transport->flight_size,
471 transport->partial_bytes_acked);
472
473} 461}
474 462
475/* Mark all the eligible packets on a transport for retransmission and force 463/* Mark all the eligible packets on a transport for retransmission and force
@@ -1016,19 +1004,13 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
1016 sctp_transport_burst_limited(transport); 1004 sctp_transport_burst_limited(transport);
1017 } 1005 }
1018 1006
1019 SCTP_DEBUG_PRINTK("sctp_outq_flush(%p, %p[%s]), ", 1007 pr_debug("%s: outq:%p, chunk:%p[%s], tx-tsn:0x%x skb->head:%p "
1020 q, chunk, 1008 "skb->users:%d\n",
1021 chunk && chunk->chunk_hdr ? 1009 __func__, q, chunk, chunk && chunk->chunk_hdr ?
1022 sctp_cname(SCTP_ST_CHUNK( 1010 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
1023 chunk->chunk_hdr->type)) 1011 "illegal chunk", ntohl(chunk->subh.data_hdr->tsn),
1024 : "Illegal Chunk"); 1012 chunk->skb ? chunk->skb->head : NULL, chunk->skb ?
1025 1013 atomic_read(&chunk->skb->users) : -1);
1026 SCTP_DEBUG_PRINTK("TX TSN 0x%x skb->head "
1027 "%p skb->users %d.\n",
1028 ntohl(chunk->subh.data_hdr->tsn),
1029 chunk->skb ?chunk->skb->head : NULL,
1030 chunk->skb ?
1031 atomic_read(&chunk->skb->users) : -1);
1032 1014
1033 /* Add the chunk to the packet. */ 1015 /* Add the chunk to the packet. */
1034 status = sctp_packet_transmit_chunk(packet, chunk, 0); 1016 status = sctp_packet_transmit_chunk(packet, chunk, 0);
@@ -1040,10 +1022,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
1040 /* We could not append this chunk, so put 1022 /* We could not append this chunk, so put
1041 * the chunk back on the output queue. 1023 * the chunk back on the output queue.
1042 */ 1024 */
1043 SCTP_DEBUG_PRINTK("sctp_outq_flush: could " 1025 pr_debug("%s: could not transmit tsn:0x%x, status:%d\n",
1044 "not transmit TSN: 0x%x, status: %d\n", 1026 __func__, ntohl(chunk->subh.data_hdr->tsn),
1045 ntohl(chunk->subh.data_hdr->tsn), 1027 status);
1046 status); 1028
1047 sctp_outq_head_data(q, chunk); 1029 sctp_outq_head_data(q, chunk);
1048 goto sctp_flush_out; 1030 goto sctp_flush_out;
1049 break; 1031 break;
@@ -1286,11 +1268,10 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
1286 1268
1287 sctp_generate_fwdtsn(q, sack_ctsn); 1269 sctp_generate_fwdtsn(q, sack_ctsn);
1288 1270
1289 SCTP_DEBUG_PRINTK("%s: sack Cumulative TSN Ack is 0x%x.\n", 1271 pr_debug("%s: sack cumulative tsn ack:0x%x\n", __func__, sack_ctsn);
1290 __func__, sack_ctsn); 1272 pr_debug("%s: cumulative tsn ack of assoc:%p is 0x%x, "
1291 SCTP_DEBUG_PRINTK("%s: Cumulative TSN Ack of association, " 1273 "advertised peer ack point:0x%x\n", __func__, asoc, ctsn,
1292 "%p is 0x%x. Adv peer ack point: 0x%x\n", 1274 asoc->adv_peer_ack_point);
1293 __func__, asoc, ctsn, asoc->adv_peer_ack_point);
1294 1275
1295 /* See if all chunks are acked. 1276 /* See if all chunks are acked.
1296 * Make sure the empty queue handler will get run later. 1277 * Make sure the empty queue handler will get run later.
@@ -1306,7 +1287,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
1306 goto finish; 1287 goto finish;
1307 } 1288 }
1308 1289
1309 SCTP_DEBUG_PRINTK("sack queue is empty.\n"); 1290 pr_debug("%s: sack queue is empty\n", __func__);
1310finish: 1291finish:
1311 return q->empty; 1292 return q->empty;
1312} 1293}
@@ -1347,21 +1328,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1347 __u8 restart_timer = 0; 1328 __u8 restart_timer = 0;
1348 int bytes_acked = 0; 1329 int bytes_acked = 0;
1349 int migrate_bytes = 0; 1330 int migrate_bytes = 0;
1350 1331 bool forward_progress = false;
1351 /* These state variables are for coherent debug output. --xguo */
1352
1353#if SCTP_DEBUG
1354 __u32 dbg_ack_tsn = 0; /* An ACKed TSN range starts here... */
1355 __u32 dbg_last_ack_tsn = 0; /* ...and finishes here. */
1356 __u32 dbg_kept_tsn = 0; /* An un-ACKed range starts here... */
1357 __u32 dbg_last_kept_tsn = 0; /* ...and finishes here. */
1358
1359 /* 0 : The last TSN was ACKed.
1360 * 1 : The last TSN was NOT ACKed (i.e. KEPT).
1361 * -1: We need to initialize.
1362 */
1363 int dbg_prt_state = -1;
1364#endif /* SCTP_DEBUG */
1365 1332
1366 sack_ctsn = ntohl(sack->cum_tsn_ack); 1333 sack_ctsn = ntohl(sack->cum_tsn_ack);
1367 1334
@@ -1428,6 +1395,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1428 bytes_acked += sctp_data_size(tchunk); 1395 bytes_acked += sctp_data_size(tchunk);
1429 if (!tchunk->transport) 1396 if (!tchunk->transport)
1430 migrate_bytes += sctp_data_size(tchunk); 1397 migrate_bytes += sctp_data_size(tchunk);
1398 forward_progress = true;
1431 } 1399 }
1432 1400
1433 if (TSN_lte(tsn, sack_ctsn)) { 1401 if (TSN_lte(tsn, sack_ctsn)) {
@@ -1441,6 +1409,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1441 * current RTO. 1409 * current RTO.
1442 */ 1410 */
1443 restart_timer = 1; 1411 restart_timer = 1;
1412 forward_progress = true;
1444 1413
1445 if (!tchunk->tsn_gap_acked) { 1414 if (!tchunk->tsn_gap_acked) {
1446 /* 1415 /*
@@ -1484,57 +1453,11 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1484 */ 1453 */
1485 list_add_tail(lchunk, &tlist); 1454 list_add_tail(lchunk, &tlist);
1486 } 1455 }
1487
1488#if SCTP_DEBUG
1489 switch (dbg_prt_state) {
1490 case 0: /* last TSN was ACKed */
1491 if (dbg_last_ack_tsn + 1 == tsn) {
1492 /* This TSN belongs to the
1493 * current ACK range.
1494 */
1495 break;
1496 }
1497
1498 if (dbg_last_ack_tsn != dbg_ack_tsn) {
1499 /* Display the end of the
1500 * current range.
1501 */
1502 SCTP_DEBUG_PRINTK_CONT("-%08x",
1503 dbg_last_ack_tsn);
1504 }
1505
1506 /* Start a new range. */
1507 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1508 dbg_ack_tsn = tsn;
1509 break;
1510
1511 case 1: /* The last TSN was NOT ACKed. */
1512 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1513 /* Display the end of current range. */
1514 SCTP_DEBUG_PRINTK_CONT("-%08x",
1515 dbg_last_kept_tsn);
1516 }
1517
1518 SCTP_DEBUG_PRINTK_CONT("\n");
1519
1520 /* FALL THROUGH... */
1521 default:
1522 /* This is the first-ever TSN we examined. */
1523 /* Start a new range of ACK-ed TSNs. */
1524 SCTP_DEBUG_PRINTK("ACKed: %08x", tsn);
1525 dbg_prt_state = 0;
1526 dbg_ack_tsn = tsn;
1527 }
1528
1529 dbg_last_ack_tsn = tsn;
1530#endif /* SCTP_DEBUG */
1531
1532 } else { 1456 } else {
1533 if (tchunk->tsn_gap_acked) { 1457 if (tchunk->tsn_gap_acked) {
1534 SCTP_DEBUG_PRINTK("%s: Receiver reneged on " 1458 pr_debug("%s: receiver reneged on data TSN:0x%x\n",
1535 "data TSN: 0x%x\n", 1459 __func__, tsn);
1536 __func__, 1460
1537 tsn);
1538 tchunk->tsn_gap_acked = 0; 1461 tchunk->tsn_gap_acked = 0;
1539 1462
1540 if (tchunk->transport) 1463 if (tchunk->transport)
@@ -1553,59 +1476,9 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1553 } 1476 }
1554 1477
1555 list_add_tail(lchunk, &tlist); 1478 list_add_tail(lchunk, &tlist);
1556
1557#if SCTP_DEBUG
1558 /* See the above comments on ACK-ed TSNs. */
1559 switch (dbg_prt_state) {
1560 case 1:
1561 if (dbg_last_kept_tsn + 1 == tsn)
1562 break;
1563
1564 if (dbg_last_kept_tsn != dbg_kept_tsn)
1565 SCTP_DEBUG_PRINTK_CONT("-%08x",
1566 dbg_last_kept_tsn);
1567
1568 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1569 dbg_kept_tsn = tsn;
1570 break;
1571
1572 case 0:
1573 if (dbg_last_ack_tsn != dbg_ack_tsn)
1574 SCTP_DEBUG_PRINTK_CONT("-%08x",
1575 dbg_last_ack_tsn);
1576 SCTP_DEBUG_PRINTK_CONT("\n");
1577
1578 /* FALL THROUGH... */
1579 default:
1580 SCTP_DEBUG_PRINTK("KEPT: %08x",tsn);
1581 dbg_prt_state = 1;
1582 dbg_kept_tsn = tsn;
1583 }
1584
1585 dbg_last_kept_tsn = tsn;
1586#endif /* SCTP_DEBUG */
1587 } 1479 }
1588 } 1480 }
1589 1481
1590#if SCTP_DEBUG
1591 /* Finish off the last range, displaying its ending TSN. */
1592 switch (dbg_prt_state) {
1593 case 0:
1594 if (dbg_last_ack_tsn != dbg_ack_tsn) {
1595 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn);
1596 } else {
1597 SCTP_DEBUG_PRINTK_CONT("\n");
1598 }
1599 break;
1600
1601 case 1:
1602 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1603 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn);
1604 } else {
1605 SCTP_DEBUG_PRINTK_CONT("\n");
1606 }
1607 }
1608#endif /* SCTP_DEBUG */
1609 if (transport) { 1482 if (transport) {
1610 if (bytes_acked) { 1483 if (bytes_acked) {
1611 struct sctp_association *asoc = transport->asoc; 1484 struct sctp_association *asoc = transport->asoc;
@@ -1627,6 +1500,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1627 */ 1500 */
1628 transport->error_count = 0; 1501 transport->error_count = 0;
1629 transport->asoc->overall_error_count = 0; 1502 transport->asoc->overall_error_count = 0;
1503 forward_progress = true;
1630 1504
1631 /* 1505 /*
1632 * While in SHUTDOWN PENDING, we may have started 1506 * While in SHUTDOWN PENDING, we may have started
@@ -1678,9 +1552,9 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1678 !list_empty(&tlist) && 1552 !list_empty(&tlist) &&
1679 (sack_ctsn+2 == q->asoc->next_tsn) && 1553 (sack_ctsn+2 == q->asoc->next_tsn) &&
1680 q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) { 1554 q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) {
1681 SCTP_DEBUG_PRINTK("%s: SACK received for zero " 1555 pr_debug("%s: sack received for zero window "
1682 "window probe: %u\n", 1556 "probe:%u\n", __func__, sack_ctsn);
1683 __func__, sack_ctsn); 1557
1684 q->asoc->overall_error_count = 0; 1558 q->asoc->overall_error_count = 0;
1685 transport->error_count = 0; 1559 transport->error_count = 0;
1686 } 1560 }
@@ -1700,6 +1574,11 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1700 jiffies + transport->rto)) 1574 jiffies + transport->rto))
1701 sctp_transport_hold(transport); 1575 sctp_transport_hold(transport);
1702 } 1576 }
1577
1578 if (forward_progress) {
1579 if (transport->dst)
1580 dst_confirm(transport->dst);
1581 }
1703 } 1582 }
1704 1583
1705 list_splice(&tlist, transmitted_queue); 1584 list_splice(&tlist, transmitted_queue);
@@ -1741,10 +1620,8 @@ static void sctp_mark_missing(struct sctp_outq *q,
1741 count_of_newacks, tsn)) { 1620 count_of_newacks, tsn)) {
1742 chunk->tsn_missing_report++; 1621 chunk->tsn_missing_report++;
1743 1622
1744 SCTP_DEBUG_PRINTK( 1623 pr_debug("%s: tsn:0x%x missing counter:%d\n",
1745 "%s: TSN 0x%x missing counter: %d\n", 1624 __func__, tsn, chunk->tsn_missing_report);
1746 __func__, tsn,
1747 chunk->tsn_missing_report);
1748 } 1625 }
1749 } 1626 }
1750 /* 1627 /*
@@ -1764,11 +1641,10 @@ static void sctp_mark_missing(struct sctp_outq *q,
1764 if (do_fast_retransmit) 1641 if (do_fast_retransmit)
1765 sctp_retransmit(q, transport, SCTP_RTXR_FAST_RTX); 1642 sctp_retransmit(q, transport, SCTP_RTXR_FAST_RTX);
1766 1643
1767 SCTP_DEBUG_PRINTK("%s: transport: %p, cwnd: %d, " 1644 pr_debug("%s: transport:%p, cwnd:%d, ssthresh:%d, "
1768 "ssthresh: %d, flight_size: %d, pba: %d\n", 1645 "flight_size:%d, pba:%d\n", __func__, transport,
1769 __func__, transport, transport->cwnd, 1646 transport->cwnd, transport->ssthresh,
1770 transport->ssthresh, transport->flight_size, 1647 transport->flight_size, transport->partial_bytes_acked);
1771 transport->partial_bytes_acked);
1772 } 1648 }
1773} 1649}
1774 1650
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index 794bb14decde..ce1ffd811775 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -29,10 +29,7 @@
29 * 29 *
30 * Please send any bug reports or fixes you make to the 30 * Please send any bug reports or fixes you make to the
31 * email address(es): 31 * email address(es):
32 * lksctp developers <lksctp-developers@lists.sourceforge.net> 32 * lksctp developers <linux-sctp@vger.kernel.org>
33 *
34 * Or submit a bug report through the following website:
35 * http://www.sf.net/projects/lksctp
36 * 33 *
37 * Written or modified by: 34 * Written or modified by:
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -40,9 +37,6 @@
40 * Karl Knutson <karl@athena.chicago.il.us> 37 * Karl Knutson <karl@athena.chicago.il.us>
41 * Ardelle Fan <ardelle.fan@intel.com> 38 * Ardelle Fan <ardelle.fan@intel.com>
42 * Kevin Gao <kevin.gao@intel.com> 39 * Kevin Gao <kevin.gao@intel.com>
43 *
44 * Any bugs reported given to us we will try to fix... any fixes shared will
45 * be incorporated into the next SCTP release.
46 */ 40 */
47 41
48#include <linux/types.h> 42#include <linux/types.h>
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index e62c22535be4..53c452efb40b 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -46,6 +46,10 @@ static int port __read_mostly = 0;
46MODULE_PARM_DESC(port, "Port to match (0=all)"); 46MODULE_PARM_DESC(port, "Port to match (0=all)");
47module_param(port, int, 0); 47module_param(port, int, 0);
48 48
49static unsigned int fwmark __read_mostly = 0;
50MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
51module_param(fwmark, uint, 0);
52
49static int bufsize __read_mostly = 64 * 1024; 53static int bufsize __read_mostly = 64 * 1024;
50MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); 54MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
51module_param(bufsize, int, 0); 55module_param(bufsize, int, 0);
@@ -129,15 +133,19 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
129 void *arg, 133 void *arg,
130 sctp_cmd_seq_t *commands) 134 sctp_cmd_seq_t *commands)
131{ 135{
136 struct sctp_chunk *chunk = arg;
137 struct sk_buff *skb = chunk->skb;
132 struct sctp_transport *sp; 138 struct sctp_transport *sp;
133 static __u32 lcwnd = 0; 139 static __u32 lcwnd = 0;
134 struct timespec now; 140 struct timespec now;
135 141
136 sp = asoc->peer.primary_path; 142 sp = asoc->peer.primary_path;
137 143
138 if ((full || sp->cwnd != lcwnd) && 144 if (((port == 0 && fwmark == 0) ||
139 (!port || asoc->peer.port == port || 145 asoc->peer.port == port ||
140 ep->base.bind_addr.port == port)) { 146 ep->base.bind_addr.port == port ||
147 (fwmark > 0 && skb->mark == fwmark)) &&
148 (full || sp->cwnd != lcwnd)) {
141 lcwnd = sp->cwnd; 149 lcwnd = sp->cwnd;
142 150
143 getnstimeofday(&now); 151 getnstimeofday(&now);
@@ -155,13 +163,8 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
155 if (sp == asoc->peer.primary_path) 163 if (sp == asoc->peer.primary_path)
156 printl("*"); 164 printl("*");
157 165
158 if (sp->ipaddr.sa.sa_family == AF_INET) 166 printl("%pISc %2u %8u %8u %8u %8u %8u ",
159 printl("%pI4 ", &sp->ipaddr.v4.sin_addr); 167 &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh,
160 else
161 printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
162
163 printl("%2u %8u %8u %8u %8u %8u ",
164 sp->state, sp->cwnd, sp->ssthresh,
165 sp->flight_size, sp->partial_bytes_acked, 168 sp->flight_size, sp->partial_bytes_acked,
166 sp->pathmtu); 169 sp->pathmtu);
167 } 170 }
@@ -203,8 +206,8 @@ static __init int sctpprobe_init(void)
203 if (ret) 206 if (ret)
204 goto remove_proc; 207 goto remove_proc;
205 208
206 pr_info("probe registered (port=%d)\n", port); 209 pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
207 210 port, fwmark, bufsize);
208 return 0; 211 return 0;
209 212
210remove_proc: 213remove_proc:
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 4e45ee35d0db..0c0642156842 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -22,16 +22,10 @@
22 * 22 *
23 * Please send any bug reports or fixes you make to the 23 * Please send any bug reports or fixes you make to the
24 * email address(es): 24 * email address(es):
25 * lksctp developers <lksctp-developers@lists.sourceforge.net> 25 * lksctp developers <linux-sctp@vger.kernel.org>
26 *
27 * Or submit a bug report through the following website:
28 * http://www.sf.net/projects/lksctp
29 * 26 *
30 * Written or modified by: 27 * Written or modified by:
31 * Sridhar Samudrala <sri@us.ibm.com> 28 * Sridhar Samudrala <sri@us.ibm.com>
32 *
33 * Any bugs reported given to us we will try to fix... any fixes shared will
34 * be incorporated into the next SCTP release.
35 */ 29 */
36 30
37#include <linux/types.h> 31#include <linux/types.h>
@@ -134,9 +128,15 @@ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_commo
134 struct sctp_af *af; 128 struct sctp_af *af;
135 129
136 if (epb->type == SCTP_EP_TYPE_ASSOCIATION) { 130 if (epb->type == SCTP_EP_TYPE_ASSOCIATION) {
137 asoc = sctp_assoc(epb); 131 asoc = sctp_assoc(epb);
138 peer = asoc->peer.primary_path; 132
139 primary = &peer->saddr; 133 peer = asoc->peer.primary_path;
134 if (unlikely(peer == NULL)) {
135 WARN(1, "Association %p with NULL primary path!\n", asoc);
136 return;
137 }
138
139 primary = &peer->saddr;
140 } 140 }
141 141
142 rcu_read_lock(); 142 rcu_read_lock();
@@ -226,7 +226,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
226 sk = epb->sk; 226 sk = epb->sk;
227 if (!net_eq(sock_net(sk), seq_file_net(seq))) 227 if (!net_eq(sock_net(sk), seq_file_net(seq)))
228 continue; 228 continue;
229 seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, 229 seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5lu ", ep, sk,
230 sctp_sk(sk)->type, sk->sk_state, hash, 230 sctp_sk(sk)->type, sk->sk_state, hash,
231 epb->bind_addr.port, 231 epb->bind_addr.port,
232 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), 232 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
@@ -336,7 +336,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
336 continue; 336 continue;
337 seq_printf(seq, 337 seq_printf(seq,
338 "%8pK %8pK %-3d %-3d %-2d %-4d " 338 "%8pK %8pK %-3d %-3d %-2d %-4d "
339 "%4d %8d %8d %7d %5lu %-5d %5d ", 339 "%4d %8d %8d %7u %5lu %-5d %5d ",
340 assoc, sk, sctp_sk(sk)->type, sk->sk_state, 340 assoc, sk, sctp_sk(sk)->type, sk->sk_state,
341 assoc->state, hash, 341 assoc->state, hash,
342 assoc->assoc_id, 342 assoc->assoc_id,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index eaee00c61139..5e17092f4ada 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -29,10 +29,7 @@
29 * 29 *
30 * Please send any bug reports or fixes you make to the 30 * Please send any bug reports or fixes you make to the
31 * email address(es): 31 * email address(es):
32 * lksctp developers <lksctp-developers@lists.sourceforge.net> 32 * lksctp developers <linux-sctp@vger.kernel.org>
33 *
34 * Or submit a bug report through the following website:
35 * http://www.sf.net/projects/lksctp
36 * 33 *
37 * Written or modified by: 34 * Written or modified by:
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -41,9 +38,6 @@
41 * Sridhar Samudrala <sri@us.ibm.com> 38 * Sridhar Samudrala <sri@us.ibm.com>
42 * Daisy Chang <daisyc@us.ibm.com> 39 * Daisy Chang <daisyc@us.ibm.com>
43 * Ardelle Fan <ardelle.fan@intel.com> 40 * Ardelle Fan <ardelle.fan@intel.com>
44 *
45 * Any bugs reported given to us we will try to fix... any fixes shared will
46 * be incorporated into the next SCTP release.
47 */ 41 */
48 42
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -153,7 +147,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
153 147
154 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 148 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
155 /* Add the address to the local list. */ 149 /* Add the address to the local list. */
156 addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); 150 addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
157 if (addr) { 151 if (addr) {
158 addr->a.v4.sin_family = AF_INET; 152 addr->a.v4.sin_family = AF_INET;
159 addr->a.v4.sin_port = 0; 153 addr->a.v4.sin_port = 0;
@@ -178,7 +172,7 @@ static void sctp_get_local_addr_list(struct net *net)
178 172
179 rcu_read_lock(); 173 rcu_read_lock();
180 for_each_netdev_rcu(net, dev) { 174 for_each_netdev_rcu(net, dev) {
181 __list_for_each(pos, &sctp_address_families) { 175 list_for_each(pos, &sctp_address_families) {
182 af = list_entry(pos, struct sctp_af, list); 176 af = list_entry(pos, struct sctp_af, list);
183 af->copy_addrlist(&net->sctp.local_addr_list, dev); 177 af->copy_addrlist(&net->sctp.local_addr_list, dev);
184 } 178 }
@@ -451,8 +445,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
451 fl4->fl4_sport = saddr->v4.sin_port; 445 fl4->fl4_sport = saddr->v4.sin_port;
452 } 446 }
453 447
454 SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", 448 pr_debug("%s: dst:%pI4, src:%pI4 - ", __func__, &fl4->daddr,
455 __func__, &fl4->daddr, &fl4->saddr); 449 &fl4->saddr);
456 450
457 rt = ip_route_output_key(sock_net(sk), fl4); 451 rt = ip_route_output_key(sock_net(sk), fl4);
458 if (!IS_ERR(rt)) 452 if (!IS_ERR(rt))
@@ -513,10 +507,10 @@ out_unlock:
513out: 507out:
514 t->dst = dst; 508 t->dst = dst;
515 if (dst) 509 if (dst)
516 SCTP_DEBUG_PRINTK("rt_dst:%pI4, rt_src:%pI4\n", 510 pr_debug("rt_dst:%pI4, rt_src:%pI4\n",
517 &fl4->daddr, &fl4->saddr); 511 &fl4->daddr, &fl4->saddr);
518 else 512 else
519 SCTP_DEBUG_PRINTK("NO ROUTE\n"); 513 pr_debug("no route\n");
520} 514}
521 515
522/* For v4, the source address is cached in the route entry(dst). So no need 516/* For v4, the source address is cached in the route entry(dst). So no need
@@ -604,9 +598,9 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg)
604 spin_lock_bh(&net->sctp.addr_wq_lock); 598 spin_lock_bh(&net->sctp.addr_wq_lock);
605 599
606 list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { 600 list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) {
607 SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", 601 pr_debug("%s: the first ent in wq:%p is addr:%pISc for cmd:%d at "
608 " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state, 602 "entry:%p\n", __func__, &net->sctp.addr_waitq, &addrw->a.sa,
609 addrw); 603 addrw->state, addrw);
610 604
611#if IS_ENABLED(CONFIG_IPV6) 605#if IS_ENABLED(CONFIG_IPV6)
612 /* Now we send an ASCONF for each association */ 606 /* Now we send an ASCONF for each association */
@@ -623,8 +617,10 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg)
623 addrw->state == SCTP_ADDR_NEW) { 617 addrw->state == SCTP_ADDR_NEW) {
624 unsigned long timeo_val; 618 unsigned long timeo_val;
625 619
626 SCTP_DEBUG_PRINTK("sctp_timo_handler: this is on DAD, trying %d sec later\n", 620 pr_debug("%s: this is on DAD, trying %d sec "
627 SCTP_ADDRESS_TICK_DELAY); 621 "later\n", __func__,
622 SCTP_ADDRESS_TICK_DELAY);
623
628 timeo_val = jiffies; 624 timeo_val = jiffies;
629 timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); 625 timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY);
630 mod_timer(&net->sctp.addr_wq_timer, timeo_val); 626 mod_timer(&net->sctp.addr_wq_timer, timeo_val);
@@ -641,7 +637,7 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg)
641 continue; 637 continue;
642 sctp_bh_lock_sock(sk); 638 sctp_bh_lock_sock(sk);
643 if (sctp_asconf_mgmt(sp, addrw) < 0) 639 if (sctp_asconf_mgmt(sp, addrw) < 0)
644 SCTP_DEBUG_PRINTK("sctp_addrwq_timo_handler: sctp_asconf_mgmt failed\n"); 640 pr_debug("%s: sctp_asconf_mgmt failed\n", __func__);
645 sctp_bh_unlock_sock(sk); 641 sctp_bh_unlock_sock(sk);
646 } 642 }
647#if IS_ENABLED(CONFIG_IPV6) 643#if IS_ENABLED(CONFIG_IPV6)
@@ -707,9 +703,10 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm
707 addrw = sctp_addr_wq_lookup(net, addr); 703 addrw = sctp_addr_wq_lookup(net, addr);
708 if (addrw) { 704 if (addrw) {
709 if (addrw->state != cmd) { 705 if (addrw->state != cmd) {
710 SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", 706 pr_debug("%s: offsets existing entry for %d, addr:%pISc "
711 " in wq %p\n", addrw->state, &addrw->a, 707 "in wq:%p\n", __func__, addrw->state, &addrw->a.sa,
712 &net->sctp.addr_waitq); 708 &net->sctp.addr_waitq);
709
713 list_del(&addrw->list); 710 list_del(&addrw->list);
714 kfree(addrw); 711 kfree(addrw);
715 } 712 }
@@ -725,8 +722,9 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm
725 } 722 }
726 addrw->state = cmd; 723 addrw->state = cmd;
727 list_add_tail(&addrw->list, &net->sctp.addr_waitq); 724 list_add_tail(&addrw->list, &net->sctp.addr_waitq);
728 SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", 725
729 " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq); 726 pr_debug("%s: add new entry for cmd:%d, addr:%pISc in wq:%p\n",
727 __func__, addrw->state, &addrw->a.sa, &net->sctp.addr_waitq);
730 728
731 if (!timer_pending(&net->sctp.addr_wq_timer)) { 729 if (!timer_pending(&net->sctp.addr_wq_timer)) {
732 timeo_val = jiffies; 730 timeo_val = jiffies;
@@ -952,15 +950,14 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
952{ 950{
953 struct inet_sock *inet = inet_sk(skb->sk); 951 struct inet_sock *inet = inet_sk(skb->sk);
954 952
955 SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", 953 pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb,
956 __func__, skb, skb->len, 954 skb->len, &transport->fl.u.ip4.saddr, &transport->fl.u.ip4.daddr);
957 &transport->fl.u.ip4.saddr,
958 &transport->fl.u.ip4.daddr);
959 955
960 inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? 956 inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
961 IP_PMTUDISC_DO : IP_PMTUDISC_DONT; 957 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
962 958
963 SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); 959 SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
960
964 return ip_queue_xmit(skb, &transport->fl); 961 return ip_queue_xmit(skb, &transport->fl);
965} 962}
966 963
@@ -1312,7 +1309,7 @@ static struct pernet_operations sctp_net_ops = {
1312}; 1309};
1313 1310
1314/* Initialize the universe into something sensible. */ 1311/* Initialize the universe into something sensible. */
1315SCTP_STATIC __init int sctp_init(void) 1312static __init int sctp_init(void)
1316{ 1313{
1317 int i; 1314 int i;
1318 int status = -EINVAL; 1315 int status = -EINVAL;
@@ -1321,9 +1318,8 @@ SCTP_STATIC __init int sctp_init(void)
1321 int max_share; 1318 int max_share;
1322 int order; 1319 int order;
1323 1320
1324 /* SCTP_DEBUG sanity check. */ 1321 BUILD_BUG_ON(sizeof(struct sctp_ulpevent) >
1325 if (!sctp_sanity_check()) 1322 sizeof(((struct sk_buff *) 0)->cb));
1326 goto out;
1327 1323
1328 /* Allocate bind_bucket and chunk caches. */ 1324 /* Allocate bind_bucket and chunk caches. */
1329 status = -ENOBUFS; 1325 status = -ENOBUFS;
@@ -1499,7 +1495,7 @@ err_chunk_cachep:
1499} 1495}
1500 1496
1501/* Exit handler for the SCTP protocol. */ 1497/* Exit handler for the SCTP protocol. */
1502SCTP_STATIC __exit void sctp_exit(void) 1498static __exit void sctp_exit(void)
1503{ 1499{
1504 /* BUG. This should probably do something useful like clean 1500 /* BUG. This should probably do something useful like clean
1505 * up all the remaining associations and all that memory. 1501 * up all the remaining associations and all that memory.
@@ -1545,7 +1541,7 @@ module_exit(sctp_exit);
1545 */ 1541 */
1546MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132"); 1542MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132");
1547MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132"); 1543MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132");
1548MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>"); 1544MODULE_AUTHOR("Linux Kernel SCTP developers <linux-sctp@vger.kernel.org>");
1549MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); 1545MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)");
1550module_param_named(no_checksums, sctp_checksum_disable, bool, 0644); 1546module_param_named(no_checksums, sctp_checksum_disable, bool, 0644);
1551MODULE_PARM_DESC(no_checksums, "Disable checksums computing and verification"); 1547MODULE_PARM_DESC(no_checksums, "Disable checksums computing and verification");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index cf579e71cff0..d244a23ab8d3 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -29,10 +29,7 @@
29 * 29 *
30 * Please send any bug reports or fixes you make to the 30 * Please send any bug reports or fixes you make to the
31 * email address(es): 31 * email address(es):
32 * lksctp developers <lksctp-developers@lists.sourceforge.net> 32 * lksctp developers <linux-sctp@vger.kernel.org>
33 *
34 * Or submit a bug report through the following website:
35 * http://www.sf.net/projects/lksctp
36 * 33 *
37 * Written or modified by: 34 * Written or modified by:
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -45,9 +42,6 @@
45 * Daisy Chang <daisyc@us.ibm.com> 42 * Daisy Chang <daisyc@us.ibm.com>
46 * Ardelle Fan <ardelle.fan@intel.com> 43 * Ardelle Fan <ardelle.fan@intel.com>
47 * Kevin Gao <kevin.gao@intel.com> 44 * Kevin Gao <kevin.gao@intel.com>
48 *
49 * Any bugs reported given to us we will try to fix... any fixes shared will
50 * be incorporated into the next SCTP release.
51 */ 45 */
52 46
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 47#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -68,9 +62,12 @@
68#include <net/sctp/sctp.h> 62#include <net/sctp/sctp.h>
69#include <net/sctp/sm.h> 63#include <net/sctp/sm.h>
70 64
71SCTP_STATIC 65static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
72struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, 66 __u8 type, __u8 flags, int paylen);
73 __u8 type, __u8 flags, int paylen); 67static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
68 __u8 flags, int paylen);
69static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
70 __u8 type, __u8 flags, int paylen);
74static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, 71static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
75 const struct sctp_association *asoc, 72 const struct sctp_association *asoc,
76 const struct sctp_chunk *init_chunk, 73 const struct sctp_chunk *init_chunk,
@@ -83,6 +80,28 @@ static int sctp_process_param(struct sctp_association *asoc,
83static void *sctp_addto_param(struct sctp_chunk *chunk, int len, 80static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
84 const void *data); 81 const void *data);
85 82
83/* Control chunk destructor */
84static void sctp_control_release_owner(struct sk_buff *skb)
85{
86 /*TODO: do memory release */
87}
88
89static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
90{
91 struct sctp_association *asoc = chunk->asoc;
92 struct sk_buff *skb = chunk->skb;
93
94 /* TODO: properly account for control chunks.
95 * To do it right we'll need:
96 * 1) endpoint if association isn't known.
97 * 2) proper memory accounting.
98 *
99 * For now don't do anything for now.
100 */
101 skb->sk = asoc ? asoc->base.sk : NULL;
102 skb->destructor = sctp_control_release_owner;
103}
104
86/* What was the inbound interface for this chunk? */ 105/* What was the inbound interface for this chunk? */
87int sctp_chunk_iif(const struct sctp_chunk *chunk) 106int sctp_chunk_iif(const struct sctp_chunk *chunk)
88{ 107{
@@ -297,7 +316,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
297 * PLEASE DO NOT FIXME [This version does not support Host Name.] 316 * PLEASE DO NOT FIXME [This version does not support Host Name.]
298 */ 317 */
299 318
300 retval = sctp_make_chunk(asoc, SCTP_CID_INIT, 0, chunksize); 319 retval = sctp_make_control(asoc, SCTP_CID_INIT, 0, chunksize);
301 if (!retval) 320 if (!retval)
302 goto nodata; 321 goto nodata;
303 322
@@ -444,7 +463,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
444 num_ext); 463 num_ext);
445 464
446 /* Now allocate and fill out the chunk. */ 465 /* Now allocate and fill out the chunk. */
447 retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize); 466 retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize);
448 if (!retval) 467 if (!retval)
449 goto nomem_chunk; 468 goto nomem_chunk;
450 469
@@ -549,7 +568,7 @@ struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
549 cookie_len = asoc->peer.cookie_len; 568 cookie_len = asoc->peer.cookie_len;
550 569
551 /* Build a cookie echo chunk. */ 570 /* Build a cookie echo chunk. */
552 retval = sctp_make_chunk(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len); 571 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len);
553 if (!retval) 572 if (!retval)
554 goto nodata; 573 goto nodata;
555 retval->subh.cookie_hdr = 574 retval->subh.cookie_hdr =
@@ -594,7 +613,7 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
594{ 613{
595 struct sctp_chunk *retval; 614 struct sctp_chunk *retval;
596 615
597 retval = sctp_make_chunk(asoc, SCTP_CID_COOKIE_ACK, 0, 0); 616 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ACK, 0, 0);
598 617
599 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 618 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
600 * 619 *
@@ -642,8 +661,8 @@ struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
642 sctp_cwrhdr_t cwr; 661 sctp_cwrhdr_t cwr;
643 662
644 cwr.lowest_tsn = htonl(lowest_tsn); 663 cwr.lowest_tsn = htonl(lowest_tsn);
645 retval = sctp_make_chunk(asoc, SCTP_CID_ECN_CWR, 0, 664 retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0,
646 sizeof(sctp_cwrhdr_t)); 665 sizeof(sctp_cwrhdr_t));
647 666
648 if (!retval) 667 if (!retval)
649 goto nodata; 668 goto nodata;
@@ -676,8 +695,8 @@ struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
676 sctp_ecnehdr_t ecne; 695 sctp_ecnehdr_t ecne;
677 696
678 ecne.lowest_tsn = htonl(lowest_tsn); 697 ecne.lowest_tsn = htonl(lowest_tsn);
679 retval = sctp_make_chunk(asoc, SCTP_CID_ECN_ECNE, 0, 698 retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0,
680 sizeof(sctp_ecnehdr_t)); 699 sizeof(sctp_ecnehdr_t));
681 if (!retval) 700 if (!retval)
682 goto nodata; 701 goto nodata;
683 retval->subh.ecne_hdr = 702 retval->subh.ecne_hdr =
@@ -713,7 +732,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
713 dp.ssn = htons(ssn); 732 dp.ssn = htons(ssn);
714 733
715 chunk_len = sizeof(dp) + data_len; 734 chunk_len = sizeof(dp) + data_len;
716 retval = sctp_make_chunk(asoc, SCTP_CID_DATA, flags, chunk_len); 735 retval = sctp_make_data(asoc, flags, chunk_len);
717 if (!retval) 736 if (!retval)
718 goto nodata; 737 goto nodata;
719 738
@@ -742,7 +761,8 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
742 761
743 memset(gabs, 0, sizeof(gabs)); 762 memset(gabs, 0, sizeof(gabs));
744 ctsn = sctp_tsnmap_get_ctsn(map); 763 ctsn = sctp_tsnmap_get_ctsn(map);
745 SCTP_DEBUG_PRINTK("sackCTSNAck sent: 0x%x.\n", ctsn); 764
765 pr_debug("%s: sackCTSNAck sent:0x%x\n", __func__, ctsn);
746 766
747 /* How much room is needed in the chunk? */ 767 /* How much room is needed in the chunk? */
748 num_gabs = sctp_tsnmap_num_gabs(map, gabs); 768 num_gabs = sctp_tsnmap_num_gabs(map, gabs);
@@ -759,7 +779,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
759 + sizeof(__u32) * num_dup_tsns; 779 + sizeof(__u32) * num_dup_tsns;
760 780
761 /* Create the chunk. */ 781 /* Create the chunk. */
762 retval = sctp_make_chunk(asoc, SCTP_CID_SACK, 0, len); 782 retval = sctp_make_control(asoc, SCTP_CID_SACK, 0, len);
763 if (!retval) 783 if (!retval)
764 goto nodata; 784 goto nodata;
765 785
@@ -838,8 +858,8 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
838 ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); 858 ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
839 shut.cum_tsn_ack = htonl(ctsn); 859 shut.cum_tsn_ack = htonl(ctsn);
840 860
841 retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN, 0, 861 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
842 sizeof(sctp_shutdownhdr_t)); 862 sizeof(sctp_shutdownhdr_t));
843 if (!retval) 863 if (!retval)
844 goto nodata; 864 goto nodata;
845 865
@@ -857,7 +877,7 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
857{ 877{
858 struct sctp_chunk *retval; 878 struct sctp_chunk *retval;
859 879
860 retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0); 880 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0);
861 881
862 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 882 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
863 * 883 *
@@ -886,7 +906,7 @@ struct sctp_chunk *sctp_make_shutdown_complete(
886 */ 906 */
887 flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T; 907 flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T;
888 908
889 retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0); 909 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0);
890 910
891 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 911 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
892 * 912 *
@@ -925,7 +945,7 @@ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
925 flags = SCTP_CHUNK_FLAG_T; 945 flags = SCTP_CHUNK_FLAG_T;
926 } 946 }
927 947
928 retval = sctp_make_chunk(asoc, SCTP_CID_ABORT, flags, hint); 948 retval = sctp_make_control(asoc, SCTP_CID_ABORT, flags, hint);
929 949
930 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 950 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
931 * 951 *
@@ -1117,7 +1137,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
1117 struct sctp_chunk *retval; 1137 struct sctp_chunk *retval;
1118 sctp_sender_hb_info_t hbinfo; 1138 sctp_sender_hb_info_t hbinfo;
1119 1139
1120 retval = sctp_make_chunk(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo)); 1140 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo));
1121 1141
1122 if (!retval) 1142 if (!retval)
1123 goto nodata; 1143 goto nodata;
@@ -1145,7 +1165,7 @@ struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
1145{ 1165{
1146 struct sctp_chunk *retval; 1166 struct sctp_chunk *retval;
1147 1167
1148 retval = sctp_make_chunk(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen); 1168 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen);
1149 if (!retval) 1169 if (!retval)
1150 goto nodata; 1170 goto nodata;
1151 1171
@@ -1177,8 +1197,8 @@ static struct sctp_chunk *sctp_make_op_error_space(
1177{ 1197{
1178 struct sctp_chunk *retval; 1198 struct sctp_chunk *retval;
1179 1199
1180 retval = sctp_make_chunk(asoc, SCTP_CID_ERROR, 0, 1200 retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
1181 sizeof(sctp_errhdr_t) + size); 1201 sizeof(sctp_errhdr_t) + size);
1182 if (!retval) 1202 if (!retval)
1183 goto nodata; 1203 goto nodata;
1184 1204
@@ -1248,7 +1268,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
1248 if (unlikely(!hmac_desc)) 1268 if (unlikely(!hmac_desc))
1249 return NULL; 1269 return NULL;
1250 1270
1251 retval = sctp_make_chunk(asoc, SCTP_CID_AUTH, 0, 1271 retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0,
1252 hmac_desc->hmac_len + sizeof(sctp_authhdr_t)); 1272 hmac_desc->hmac_len + sizeof(sctp_authhdr_t));
1253 if (!retval) 1273 if (!retval)
1254 return NULL; 1274 return NULL;
@@ -1288,10 +1308,8 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
1288 1308
1289 if (!retval) 1309 if (!retval)
1290 goto nodata; 1310 goto nodata;
1291 1311 if (!sk)
1292 if (!sk) { 1312 pr_debug("%s: chunkifying skb:%p w/o an sk\n", __func__, skb);
1293 SCTP_DEBUG_PRINTK("chunkifying skb %p w/o an sk\n", skb);
1294 }
1295 1313
1296 INIT_LIST_HEAD(&retval->list); 1314 INIT_LIST_HEAD(&retval->list);
1297 retval->skb = skb; 1315 retval->skb = skb;
@@ -1353,9 +1371,8 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk)
1353/* Create a new chunk, setting the type and flags headers from the 1371/* Create a new chunk, setting the type and flags headers from the
1354 * arguments, reserving enough space for a 'paylen' byte payload. 1372 * arguments, reserving enough space for a 'paylen' byte payload.
1355 */ 1373 */
1356SCTP_STATIC 1374static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
1357struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, 1375 __u8 type, __u8 flags, int paylen)
1358 __u8 type, __u8 flags, int paylen)
1359{ 1376{
1360 struct sctp_chunk *retval; 1377 struct sctp_chunk *retval;
1361 sctp_chunkhdr_t *chunk_hdr; 1378 sctp_chunkhdr_t *chunk_hdr;
@@ -1388,14 +1405,27 @@ struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc,
1388 if (sctp_auth_send_cid(type, asoc)) 1405 if (sctp_auth_send_cid(type, asoc))
1389 retval->auth = 1; 1406 retval->auth = 1;
1390 1407
1391 /* Set the skb to the belonging sock for accounting. */
1392 skb->sk = sk;
1393
1394 return retval; 1408 return retval;
1395nodata: 1409nodata:
1396 return NULL; 1410 return NULL;
1397} 1411}
1398 1412
1413static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
1414 __u8 flags, int paylen)
1415{
1416 return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen);
1417}
1418
1419static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
1420 __u8 type, __u8 flags, int paylen)
1421{
1422 struct sctp_chunk *chunk = _sctp_make_chunk(asoc, type, flags, paylen);
1423
1424 if (chunk)
1425 sctp_control_set_owner_w(chunk);
1426
1427 return chunk;
1428}
1399 1429
1400/* Release the memory occupied by a chunk. */ 1430/* Release the memory occupied by a chunk. */
1401static void sctp_chunk_destroy(struct sctp_chunk *chunk) 1431static void sctp_chunk_destroy(struct sctp_chunk *chunk)
@@ -1632,8 +1662,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
1632 cookie->c.adaptation_ind = asoc->peer.adaptation_ind; 1662 cookie->c.adaptation_ind = asoc->peer.adaptation_ind;
1633 1663
1634 /* Set an expiration time for the cookie. */ 1664 /* Set an expiration time for the cookie. */
1635 do_gettimeofday(&cookie->c.expiration); 1665 cookie->c.expiration = ktime_add(asoc->cookie_life,
1636 TIMEVAL_ADD(asoc->cookie_life, cookie->c.expiration); 1666 ktime_get());
1637 1667
1638 /* Copy the peer's init packet. */ 1668 /* Copy the peer's init packet. */
1639 memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr, 1669 memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr,
@@ -1682,7 +1712,7 @@ struct sctp_association *sctp_unpack_cookie(
1682 unsigned int len; 1712 unsigned int len;
1683 sctp_scope_t scope; 1713 sctp_scope_t scope;
1684 struct sk_buff *skb = chunk->skb; 1714 struct sk_buff *skb = chunk->skb;
1685 struct timeval tv; 1715 ktime_t kt;
1686 struct hash_desc desc; 1716 struct hash_desc desc;
1687 1717
1688 /* Header size is static data prior to the actual cookie, including 1718 /* Header size is static data prior to the actual cookie, including
@@ -1759,11 +1789,11 @@ no_hmac:
1759 * down the new association establishment instead of every packet. 1789 * down the new association establishment instead of every packet.
1760 */ 1790 */
1761 if (sock_flag(ep->base.sk, SOCK_TIMESTAMP)) 1791 if (sock_flag(ep->base.sk, SOCK_TIMESTAMP))
1762 skb_get_timestamp(skb, &tv); 1792 kt = skb_get_ktime(skb);
1763 else 1793 else
1764 do_gettimeofday(&tv); 1794 kt = ktime_get();
1765 1795
1766 if (!asoc && tv_lt(bear_cookie->expiration, tv)) { 1796 if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) {
1767 /* 1797 /*
1768 * Section 3.3.10.3 Stale Cookie Error (3) 1798 * Section 3.3.10.3 Stale Cookie Error (3)
1769 * 1799 *
@@ -1775,9 +1805,7 @@ no_hmac:
1775 len = ntohs(chunk->chunk_hdr->length); 1805 len = ntohs(chunk->chunk_hdr->length);
1776 *errp = sctp_make_op_error_space(asoc, chunk, len); 1806 *errp = sctp_make_op_error_space(asoc, chunk, len);
1777 if (*errp) { 1807 if (*errp) {
1778 suseconds_t usecs = (tv.tv_sec - 1808 suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration));
1779 bear_cookie->expiration.tv_sec) * 1000000L +
1780 tv.tv_usec - bear_cookie->expiration.tv_usec;
1781 __be32 n = htonl(usecs); 1809 __be32 n = htonl(usecs);
1782 1810
1783 sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, 1811 sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE,
@@ -2195,8 +2223,9 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
2195 break; 2223 break;
2196fallthrough: 2224fallthrough:
2197 default: 2225 default:
2198 SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n", 2226 pr_debug("%s: unrecognized param:%d for chunk:%d\n",
2199 ntohs(param.p->type), cid); 2227 __func__, ntohs(param.p->type), cid);
2228
2200 retval = sctp_process_unk_param(asoc, param, chunk, err_chunk); 2229 retval = sctp_process_unk_param(asoc, param, chunk, err_chunk);
2201 break; 2230 break;
2202 } 2231 }
@@ -2211,25 +2240,23 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
2211 struct sctp_chunk **errp) 2240 struct sctp_chunk **errp)
2212{ 2241{
2213 union sctp_params param; 2242 union sctp_params param;
2214 int has_cookie = 0; 2243 bool has_cookie = false;
2215 int result; 2244 int result;
2216 2245
2217 /* Verify stream values are non-zero. */ 2246 /* Check for missing mandatory parameters. Note: Initial TSN is
2218 if ((0 == peer_init->init_hdr.num_outbound_streams) || 2247 * also mandatory, but is not checked here since the valid range
2219 (0 == peer_init->init_hdr.num_inbound_streams) || 2248 * is 0..2**32-1. RFC4960, section 3.3.3.
2220 (0 == peer_init->init_hdr.init_tag) || 2249 */
2221 (SCTP_DEFAULT_MINWINDOW > ntohl(peer_init->init_hdr.a_rwnd))) { 2250 if (peer_init->init_hdr.num_outbound_streams == 0 ||
2222 2251 peer_init->init_hdr.num_inbound_streams == 0 ||
2252 peer_init->init_hdr.init_tag == 0 ||
2253 ntohl(peer_init->init_hdr.a_rwnd) < SCTP_DEFAULT_MINWINDOW)
2223 return sctp_process_inv_mandatory(asoc, chunk, errp); 2254 return sctp_process_inv_mandatory(asoc, chunk, errp);
2224 }
2225 2255
2226 /* Check for missing mandatory parameters. */
2227 sctp_walk_params(param, peer_init, init_hdr.params) { 2256 sctp_walk_params(param, peer_init, init_hdr.params) {
2228 2257 if (param.p->type == SCTP_PARAM_STATE_COOKIE)
2229 if (SCTP_PARAM_STATE_COOKIE == param.p->type) 2258 has_cookie = true;
2230 has_cookie = 1; 2259 }
2231
2232 } /* for (loop through all parameters) */
2233 2260
2234 /* There is a possibility that a parameter length was bad and 2261 /* There is a possibility that a parameter length was bad and
2235 * in that case we would have stoped walking the parameters. 2262 * in that case we would have stoped walking the parameters.
@@ -2516,12 +2543,11 @@ do_addr_param:
2516 /* Suggested Cookie Life span increment's unit is msec, 2543 /* Suggested Cookie Life span increment's unit is msec,
2517 * (1/1000sec). 2544 * (1/1000sec).
2518 */ 2545 */
2519 asoc->cookie_life.tv_sec += stale / 1000; 2546 asoc->cookie_life = ktime_add_ms(asoc->cookie_life, stale);
2520 asoc->cookie_life.tv_usec += (stale % 1000) * 1000;
2521 break; 2547 break;
2522 2548
2523 case SCTP_PARAM_HOST_NAME_ADDRESS: 2549 case SCTP_PARAM_HOST_NAME_ADDRESS:
2524 SCTP_DEBUG_PRINTK("unimplemented SCTP_HOST_NAME_ADDRESS\n"); 2550 pr_debug("%s: unimplemented SCTP_HOST_NAME_ADDRESS\n", __func__);
2525 break; 2551 break;
2526 2552
2527 case SCTP_PARAM_SUPPORTED_ADDRESS_TYPES: 2553 case SCTP_PARAM_SUPPORTED_ADDRESS_TYPES:
@@ -2667,8 +2693,8 @@ fall_through:
2667 * called prior to this routine. Simply log the error 2693 * called prior to this routine. Simply log the error
2668 * here. 2694 * here.
2669 */ 2695 */
2670 SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n", 2696 pr_debug("%s: ignoring param:%d for association:%p.\n",
2671 ntohs(param.p->type), asoc); 2697 __func__, ntohs(param.p->type), asoc);
2672 break; 2698 break;
2673 } 2699 }
2674 2700
@@ -2738,7 +2764,7 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
2738 length += addrlen; 2764 length += addrlen;
2739 2765
2740 /* Create the chunk. */ 2766 /* Create the chunk. */
2741 retval = sctp_make_chunk(asoc, SCTP_CID_ASCONF, 0, length); 2767 retval = sctp_make_control(asoc, SCTP_CID_ASCONF, 0, length);
2742 if (!retval) 2768 if (!retval)
2743 return NULL; 2769 return NULL;
2744 2770
@@ -2810,7 +2836,10 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
2810 totallen += paramlen; 2836 totallen += paramlen;
2811 totallen += addr_param_len; 2837 totallen += addr_param_len;
2812 del_pickup = 1; 2838 del_pickup = 1;
2813 SCTP_DEBUG_PRINTK("mkasconf_update_ip: picked same-scope del_pending addr, totallen for all addresses is %d\n", totallen); 2839
2840 pr_debug("%s: picked same-scope del_pending addr, "
2841 "totallen for all addresses is %d\n",
2842 __func__, totallen);
2814 } 2843 }
2815 } 2844 }
2816 2845
@@ -2919,7 +2948,7 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as
2919 int length = sizeof(asconf) + vparam_len; 2948 int length = sizeof(asconf) + vparam_len;
2920 2949
2921 /* Create the chunk. */ 2950 /* Create the chunk. */
2922 retval = sctp_make_chunk(asoc, SCTP_CID_ASCONF_ACK, 0, length); 2951 retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length);
2923 if (!retval) 2952 if (!retval)
2924 return NULL; 2953 return NULL;
2925 2954
@@ -3450,7 +3479,7 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
3450 3479
3451 hint = (nstreams + 1) * sizeof(__u32); 3480 hint = (nstreams + 1) * sizeof(__u32);
3452 3481
3453 retval = sctp_make_chunk(asoc, SCTP_CID_FWD_TSN, 0, hint); 3482 retval = sctp_make_control(asoc, SCTP_CID_FWD_TSN, 0, hint);
3454 3483
3455 if (!retval) 3484 if (!retval)
3456 return NULL; 3485 return NULL;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 8aab894aeabe..666c66842799 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -42,9 +39,6 @@
42 * Daisy Chang <daisyc@us.ibm.com> 39 * Daisy Chang <daisyc@us.ibm.com>
43 * Sridhar Samudrala <sri@us.ibm.com> 40 * Sridhar Samudrala <sri@us.ibm.com>
44 * Ardelle Fan <ardelle.fan@intel.com> 41 * Ardelle Fan <ardelle.fan@intel.com>
45 *
46 * Any bugs reported given to us we will try to fix... any fixes shared will
47 * be incorporated into the next SCTP release.
48 */ 42 */
49 43
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -257,7 +251,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
257 251
258 sctp_bh_lock_sock(asoc->base.sk); 252 sctp_bh_lock_sock(asoc->base.sk);
259 if (sock_owned_by_user(asoc->base.sk)) { 253 if (sock_owned_by_user(asoc->base.sk)) {
260 SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); 254 pr_debug("%s: sock is busy\n", __func__);
261 255
262 /* Try again later. */ 256 /* Try again later. */
263 if (!mod_timer(&transport->T3_rtx_timer, jiffies + (HZ/20))) 257 if (!mod_timer(&transport->T3_rtx_timer, jiffies + (HZ/20)))
@@ -297,9 +291,8 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc,
297 291
298 sctp_bh_lock_sock(asoc->base.sk); 292 sctp_bh_lock_sock(asoc->base.sk);
299 if (sock_owned_by_user(asoc->base.sk)) { 293 if (sock_owned_by_user(asoc->base.sk)) {
300 SCTP_DEBUG_PRINTK("%s:Sock is busy: timer %d\n", 294 pr_debug("%s: sock is busy: timer %d\n", __func__,
301 __func__, 295 timeout_type);
302 timeout_type);
303 296
304 /* Try again later. */ 297 /* Try again later. */
305 if (!mod_timer(&asoc->timers[timeout_type], jiffies + (HZ/20))) 298 if (!mod_timer(&asoc->timers[timeout_type], jiffies + (HZ/20)))
@@ -377,7 +370,7 @@ void sctp_generate_heartbeat_event(unsigned long data)
377 370
378 sctp_bh_lock_sock(asoc->base.sk); 371 sctp_bh_lock_sock(asoc->base.sk);
379 if (sock_owned_by_user(asoc->base.sk)) { 372 if (sock_owned_by_user(asoc->base.sk)) {
380 SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); 373 pr_debug("%s: sock is busy\n", __func__);
381 374
382 /* Try again later. */ 375 /* Try again later. */
383 if (!mod_timer(&transport->hb_timer, jiffies + (HZ/20))) 376 if (!mod_timer(&transport->hb_timer, jiffies + (HZ/20)))
@@ -415,7 +408,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
415 408
416 sctp_bh_lock_sock(asoc->base.sk); 409 sctp_bh_lock_sock(asoc->base.sk);
417 if (sock_owned_by_user(asoc->base.sk)) { 410 if (sock_owned_by_user(asoc->base.sk)) {
418 SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); 411 pr_debug("%s: sock is busy\n", __func__);
419 412
420 /* Try again later. */ 413 /* Try again later. */
421 if (!mod_timer(&transport->proto_unreach_timer, 414 if (!mod_timer(&transport->proto_unreach_timer,
@@ -521,11 +514,9 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
521 514
522 if (transport->state != SCTP_INACTIVE && 515 if (transport->state != SCTP_INACTIVE &&
523 (transport->error_count > transport->pathmaxrxt)) { 516 (transport->error_count > transport->pathmaxrxt)) {
524 SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", 517 pr_debug("%s: association:%p transport addr:%pISpc failed\n",
525 " transport IP: port:%d failed.\n", 518 __func__, asoc, &transport->ipaddr.sa);
526 asoc, 519
527 (&transport->ipaddr),
528 ntohs(transport->ipaddr.v4.sin_port));
529 sctp_assoc_control_transport(asoc, transport, 520 sctp_assoc_control_transport(asoc, transport,
530 SCTP_TRANSPORT_DOWN, 521 SCTP_TRANSPORT_DOWN,
531 SCTP_FAILED_THRESHOLD); 522 SCTP_FAILED_THRESHOLD);
@@ -733,6 +724,12 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
733 sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, 724 sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP,
734 SCTP_HEARTBEAT_SUCCESS); 725 SCTP_HEARTBEAT_SUCCESS);
735 726
727 /* HB-ACK was received for a the proper HB. Consider this
728 * forward progress.
729 */
730 if (t->dst)
731 dst_confirm(t->dst);
732
736 /* The receiver of the HEARTBEAT ACK should also perform an 733 /* The receiver of the HEARTBEAT ACK should also perform an
737 * RTT measurement for that destination transport address 734 * RTT measurement for that destination transport address
738 * using the time value carried in the HEARTBEAT ACK chunk. 735 * using the time value carried in the HEARTBEAT ACK chunk.
@@ -804,8 +801,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
804 801
805 asoc->state = state; 802 asoc->state = state;
806 803
807 SCTP_DEBUG_PRINTK("sctp_cmd_new_state: asoc %p[%s]\n", 804 pr_debug("%s: asoc:%p[%s]\n", __func__, asoc, sctp_state_tbl[state]);
808 asoc, sctp_state_tbl[state]);
809 805
810 if (sctp_style(sk, TCP)) { 806 if (sctp_style(sk, TCP)) {
811 /* Change the sk->sk_state of a TCP-style socket that has 807 /* Change the sk->sk_state of a TCP-style socket that has
@@ -864,6 +860,7 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
864 (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) 860 (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK))
865 return; 861 return;
866 862
863 BUG_ON(asoc->peer.primary_path == NULL);
867 sctp_unhash_established(asoc); 864 sctp_unhash_established(asoc);
868 sctp_association_free(asoc); 865 sctp_association_free(asoc);
869} 866}
@@ -1016,15 +1013,11 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
1016 asoc->timeouts[timer] = asoc->max_init_timeo; 1013 asoc->timeouts[timer] = asoc->max_init_timeo;
1017 } 1014 }
1018 asoc->init_cycle++; 1015 asoc->init_cycle++;
1019 SCTP_DEBUG_PRINTK( 1016
1020 "T1 %s Timeout adjustment" 1017 pr_debug("%s: T1[%s] timeout adjustment init_err_counter:%d"
1021 " init_err_counter: %d" 1018 " cycle:%d timeout:%ld\n", __func__, name,
1022 " cycle: %d" 1019 asoc->init_err_counter, asoc->init_cycle,
1023 " timeout: %ld\n", 1020 asoc->timeouts[timer]);
1024 name,
1025 asoc->init_err_counter,
1026 asoc->init_cycle,
1027 asoc->timeouts[timer]);
1028 } 1021 }
1029 1022
1030} 1023}
@@ -1079,23 +1072,19 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
1079 * main flow of sctp_do_sm() to keep attention focused on the real 1072 * main flow of sctp_do_sm() to keep attention focused on the real
1080 * functionality there. 1073 * functionality there.
1081 */ 1074 */
1082#define DEBUG_PRE \ 1075#define debug_pre_sfn() \
1083 SCTP_DEBUG_PRINTK("sctp_do_sm prefn: " \ 1076 pr_debug("%s[pre-fn]: ep:%p, %s, %s, asoc:%p[%s], %s\n", __func__, \
1084 "ep %p, %s, %s, asoc %p[%s], %s\n", \ 1077 ep, sctp_evttype_tbl[event_type], (*debug_fn)(subtype), \
1085 ep, sctp_evttype_tbl[event_type], \ 1078 asoc, sctp_state_tbl[state], state_fn->name)
1086 (*debug_fn)(subtype), asoc, \ 1079
1087 sctp_state_tbl[state], state_fn->name) 1080#define debug_post_sfn() \
1088 1081 pr_debug("%s[post-fn]: asoc:%p, status:%s\n", __func__, asoc, \
1089#define DEBUG_POST \ 1082 sctp_status_tbl[status])
1090 SCTP_DEBUG_PRINTK("sctp_do_sm postfn: " \ 1083
1091 "asoc %p, status: %s\n", \ 1084#define debug_post_sfx() \
1092 asoc, sctp_status_tbl[status]) 1085 pr_debug("%s[post-sfx]: error:%d, asoc:%p[%s]\n", __func__, error, \
1093 1086 asoc, sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \
1094#define DEBUG_POST_SFX \ 1087 sctp_assoc2id(asoc))) ? asoc->state : SCTP_STATE_CLOSED])
1095 SCTP_DEBUG_PRINTK("sctp_do_sm post sfx: error %d, asoc %p[%s]\n", \
1096 error, asoc, \
1097 sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \
1098 sctp_assoc2id(asoc)))?asoc->state:SCTP_STATE_CLOSED])
1099 1088
1100/* 1089/*
1101 * This is the master state machine processing function. 1090 * This is the master state machine processing function.
@@ -1115,7 +1104,6 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
1115 sctp_disposition_t status; 1104 sctp_disposition_t status;
1116 int error = 0; 1105 int error = 0;
1117 typedef const char *(printfn_t)(sctp_subtype_t); 1106 typedef const char *(printfn_t)(sctp_subtype_t);
1118
1119 static printfn_t *table[] = { 1107 static printfn_t *table[] = {
1120 NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname, 1108 NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname,
1121 }; 1109 };
@@ -1128,21 +1116,18 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
1128 1116
1129 sctp_init_cmd_seq(&commands); 1117 sctp_init_cmd_seq(&commands);
1130 1118
1131 DEBUG_PRE; 1119 debug_pre_sfn();
1132 status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands); 1120 status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands);
1133 DEBUG_POST; 1121 debug_post_sfn();
1134 1122
1135 error = sctp_side_effects(event_type, subtype, state, 1123 error = sctp_side_effects(event_type, subtype, state,
1136 ep, asoc, event_arg, status, 1124 ep, asoc, event_arg, status,
1137 &commands, gfp); 1125 &commands, gfp);
1138 DEBUG_POST_SFX; 1126 debug_post_sfx();
1139 1127
1140 return error; 1128 return error;
1141} 1129}
1142 1130
1143#undef DEBUG_PRE
1144#undef DEBUG_POST
1145
1146/***************************************************************** 1131/*****************************************************************
1147 * This the master state function side effect processing function. 1132 * This the master state function side effect processing function.
1148 *****************************************************************/ 1133 *****************************************************************/
@@ -1171,9 +1156,9 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
1171 1156
1172 switch (status) { 1157 switch (status) {
1173 case SCTP_DISPOSITION_DISCARD: 1158 case SCTP_DISPOSITION_DISCARD:
1174 SCTP_DEBUG_PRINTK("Ignored sctp protocol event - state %d, " 1159 pr_debug("%s: ignored sctp protocol event - state:%d, "
1175 "event_type %d, event_id %d\n", 1160 "event_type:%d, event_id:%d\n", __func__, state,
1176 state, event_type, subtype.chunk); 1161 event_type, subtype.chunk);
1177 break; 1162 break;
1178 1163
1179 case SCTP_DISPOSITION_NOMEM: 1164 case SCTP_DISPOSITION_NOMEM:
@@ -1274,8 +1259,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1274 sctp_outq_uncork(&asoc->outqueue); 1259 sctp_outq_uncork(&asoc->outqueue);
1275 local_cork = 0; 1260 local_cork = 0;
1276 } 1261 }
1277 asoc = cmd->obj.asoc; 1262
1278 /* Register with the endpoint. */ 1263 /* Register with the endpoint. */
1264 asoc = cmd->obj.asoc;
1265 BUG_ON(asoc->peer.primary_path == NULL);
1279 sctp_endpoint_add_asoc(ep, asoc); 1266 sctp_endpoint_add_asoc(ep, asoc);
1280 sctp_hash_established(asoc); 1267 sctp_hash_established(asoc);
1281 break; 1268 break;
@@ -1422,18 +1409,18 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1422 1409
1423 case SCTP_CMD_CHUNK_ULP: 1410 case SCTP_CMD_CHUNK_ULP:
1424 /* Send a chunk to the sockets layer. */ 1411 /* Send a chunk to the sockets layer. */
1425 SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", 1412 pr_debug("%s: sm_sideff: chunk_up:%p, ulpq:%p\n",
1426 "chunk_up:", cmd->obj.chunk, 1413 __func__, cmd->obj.chunk, &asoc->ulpq);
1427 "ulpq:", &asoc->ulpq); 1414
1428 sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk, 1415 sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk,
1429 GFP_ATOMIC); 1416 GFP_ATOMIC);
1430 break; 1417 break;
1431 1418
1432 case SCTP_CMD_EVENT_ULP: 1419 case SCTP_CMD_EVENT_ULP:
1433 /* Send a notification to the sockets layer. */ 1420 /* Send a notification to the sockets layer. */
1434 SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", 1421 pr_debug("%s: sm_sideff: event_up:%p, ulpq:%p\n",
1435 "event_up:",cmd->obj.ulpevent, 1422 __func__, cmd->obj.ulpevent, &asoc->ulpq);
1436 "ulpq:",&asoc->ulpq); 1423
1437 sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent); 1424 sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent);
1438 break; 1425 break;
1439 1426
@@ -1598,7 +1585,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1598 break; 1585 break;
1599 1586
1600 case SCTP_CMD_REPORT_BAD_TAG: 1587 case SCTP_CMD_REPORT_BAD_TAG:
1601 SCTP_DEBUG_PRINTK("vtag mismatch!\n"); 1588 pr_debug("%s: vtag mismatch!\n", __func__);
1602 break; 1589 break;
1603 1590
1604 case SCTP_CMD_STRIKE: 1591 case SCTP_CMD_STRIKE:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index de1a0138317f..dfe3f36ff2aa 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -45,9 +42,6 @@
45 * Ardelle Fan <ardelle.fan@intel.com> 42 * Ardelle Fan <ardelle.fan@intel.com>
46 * Ryan Layer <rmlayer@us.ibm.com> 43 * Ryan Layer <rmlayer@us.ibm.com>
47 * Kevin Gao <kevin.gao@intel.com> 44 * Kevin Gao <kevin.gao@intel.com>
48 *
49 * Any bugs reported given to us we will try to fix... any fixes shared will
50 * be incorporated into the next SCTP release.
51 */ 45 */
52 46
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 47#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1179,9 +1173,9 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
1179 /* Check if the timestamp looks valid. */ 1173 /* Check if the timestamp looks valid. */
1180 if (time_after(hbinfo->sent_at, jiffies) || 1174 if (time_after(hbinfo->sent_at, jiffies) ||
1181 time_after(jiffies, hbinfo->sent_at + max_interval)) { 1175 time_after(jiffies, hbinfo->sent_at + max_interval)) {
1182 SCTP_DEBUG_PRINTK("%s: HEARTBEAT ACK with invalid timestamp " 1176 pr_debug("%s: HEARTBEAT ACK with invalid timestamp received "
1183 "received for transport: %p\n", 1177 "for transport:%p\n", __func__, link);
1184 __func__, link); 1178
1185 return SCTP_DISPOSITION_DISCARD; 1179 return SCTP_DISPOSITION_DISCARD;
1186 } 1180 }
1187 1181
@@ -2562,7 +2556,8 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
2562 const struct sctp_association *asoc, 2556 const struct sctp_association *asoc,
2563 struct sctp_transport *transport) 2557 struct sctp_transport *transport)
2564{ 2558{
2565 SCTP_DEBUG_PRINTK("ABORT received (INIT).\n"); 2559 pr_debug("%s: ABORT received (INIT)\n", __func__);
2560
2566 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, 2561 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
2567 SCTP_STATE(SCTP_STATE_CLOSED)); 2562 SCTP_STATE(SCTP_STATE_CLOSED));
2568 SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); 2563 SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
@@ -2572,6 +2567,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
2572 /* CMD_INIT_FAILED will DELETE_TCB. */ 2567 /* CMD_INIT_FAILED will DELETE_TCB. */
2573 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, 2568 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
2574 SCTP_PERR(error)); 2569 SCTP_PERR(error));
2570
2575 return SCTP_DISPOSITION_ABORT; 2571 return SCTP_DISPOSITION_ABORT;
2576} 2572}
2577 2573
@@ -2637,8 +2633,9 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
2637 ctsn = ntohl(sdh->cum_tsn_ack); 2633 ctsn = ntohl(sdh->cum_tsn_ack);
2638 2634
2639 if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { 2635 if (TSN_lt(ctsn, asoc->ctsn_ack_point)) {
2640 SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); 2636 pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn,
2641 SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); 2637 asoc->ctsn_ack_point);
2638
2642 return SCTP_DISPOSITION_DISCARD; 2639 return SCTP_DISPOSITION_DISCARD;
2643 } 2640 }
2644 2641
@@ -2721,8 +2718,9 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
2721 ctsn = ntohl(sdh->cum_tsn_ack); 2718 ctsn = ntohl(sdh->cum_tsn_ack);
2722 2719
2723 if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { 2720 if (TSN_lt(ctsn, asoc->ctsn_ack_point)) {
2724 SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); 2721 pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn,
2725 SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); 2722 asoc->ctsn_ack_point);
2723
2726 return SCTP_DISPOSITION_DISCARD; 2724 return SCTP_DISPOSITION_DISCARD;
2727 } 2725 }
2728 2726
@@ -3174,8 +3172,9 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
3174 * Point indicates an out-of-order SACK. 3172 * Point indicates an out-of-order SACK.
3175 */ 3173 */
3176 if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { 3174 if (TSN_lt(ctsn, asoc->ctsn_ack_point)) {
3177 SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); 3175 pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn,
3178 SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); 3176 asoc->ctsn_ack_point);
3177
3179 return SCTP_DISPOSITION_DISCARD; 3178 return SCTP_DISPOSITION_DISCARD;
3180 } 3179 }
3181 3180
@@ -3859,7 +3858,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
3859 skb_pull(chunk->skb, len); 3858 skb_pull(chunk->skb, len);
3860 3859
3861 tsn = ntohl(fwdtsn_hdr->new_cum_tsn); 3860 tsn = ntohl(fwdtsn_hdr->new_cum_tsn);
3862 SCTP_DEBUG_PRINTK("%s: TSN 0x%x.\n", __func__, tsn); 3861 pr_debug("%s: TSN 0x%x\n", __func__, tsn);
3863 3862
3864 /* The TSN is too high--silently discard the chunk and count on it 3863 /* The TSN is too high--silently discard the chunk and count on it
3865 * getting retransmitted later. 3864 * getting retransmitted later.
@@ -3927,7 +3926,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
3927 skb_pull(chunk->skb, len); 3926 skb_pull(chunk->skb, len);
3928 3927
3929 tsn = ntohl(fwdtsn_hdr->new_cum_tsn); 3928 tsn = ntohl(fwdtsn_hdr->new_cum_tsn);
3930 SCTP_DEBUG_PRINTK("%s: TSN 0x%x.\n", __func__, tsn); 3929 pr_debug("%s: TSN 0x%x\n", __func__, tsn);
3931 3930
3932 /* The TSN is too high--silently discard the chunk and count on it 3931 /* The TSN is too high--silently discard the chunk and count on it
3933 * getting retransmitted later. 3932 * getting retransmitted later.
@@ -4166,7 +4165,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
4166 struct sctp_chunk *err_chunk; 4165 struct sctp_chunk *err_chunk;
4167 sctp_chunkhdr_t *hdr; 4166 sctp_chunkhdr_t *hdr;
4168 4167
4169 SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk); 4168 pr_debug("%s: processing unknown chunk id:%d\n", __func__, type.chunk);
4170 4169
4171 if (!sctp_vtag_verify(unk_chunk, asoc)) 4170 if (!sctp_vtag_verify(unk_chunk, asoc))
4172 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); 4171 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -4256,7 +4255,8 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
4256 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, 4255 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
4257 commands); 4256 commands);
4258 4257
4259 SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk); 4258 pr_debug("%s: chunk:%d is discarded\n", __func__, type.chunk);
4259
4260 return SCTP_DISPOSITION_DISCARD; 4260 return SCTP_DISPOSITION_DISCARD;
4261} 4261}
4262 4262
@@ -4632,16 +4632,16 @@ sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
4632 if (!repl) 4632 if (!repl)
4633 goto nomem; 4633 goto nomem;
4634 4634
4635 /* Choose transport for INIT. */
4636 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT,
4637 SCTP_CHUNK(repl));
4638
4635 /* Cast away the const modifier, as we want to just 4639 /* Cast away the const modifier, as we want to just
4636 * rerun it through as a sideffect. 4640 * rerun it through as a sideffect.
4637 */ 4641 */
4638 my_asoc = (struct sctp_association *)asoc; 4642 my_asoc = (struct sctp_association *)asoc;
4639 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(my_asoc)); 4643 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(my_asoc));
4640 4644
4641 /* Choose transport for INIT. */
4642 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT,
4643 SCTP_CHUNK(repl));
4644
4645 /* After sending the INIT, "A" starts the T1-init timer and 4645 /* After sending the INIT, "A" starts the T1-init timer and
4646 * enters the COOKIE-WAIT state. 4646 * enters the COOKIE-WAIT state.
4647 */ 4647 */
@@ -5184,7 +5184,9 @@ sctp_disposition_t sctp_sf_ignore_primitive(
5184 void *arg, 5184 void *arg,
5185 sctp_cmd_seq_t *commands) 5185 sctp_cmd_seq_t *commands)
5186{ 5186{
5187 SCTP_DEBUG_PRINTK("Primitive type %d is ignored.\n", type.primitive); 5187 pr_debug("%s: primitive type:%d is ignored\n", __func__,
5188 type.primitive);
5189
5188 return SCTP_DISPOSITION_DISCARD; 5190 return SCTP_DISPOSITION_DISCARD;
5189} 5191}
5190 5192
@@ -5379,7 +5381,9 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net,
5379 void *arg, 5381 void *arg,
5380 sctp_cmd_seq_t *commands) 5382 sctp_cmd_seq_t *commands)
5381{ 5383{
5382 SCTP_DEBUG_PRINTK("The event other type %d is ignored\n", type.other); 5384 pr_debug("%s: the event other type:%d is ignored\n",
5385 __func__, type.other);
5386
5383 return SCTP_DISPOSITION_DISCARD; 5387 return SCTP_DISPOSITION_DISCARD;
5384} 5388}
5385 5389
@@ -5527,7 +5531,8 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
5527 struct sctp_bind_addr *bp; 5531 struct sctp_bind_addr *bp;
5528 int attempts = asoc->init_err_counter + 1; 5532 int attempts = asoc->init_err_counter + 1;
5529 5533
5530 SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); 5534 pr_debug("%s: timer T1 expired (INIT)\n", __func__);
5535
5531 SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS); 5536 SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS);
5532 5537
5533 if (attempts <= asoc->max_init_attempts) { 5538 if (attempts <= asoc->max_init_attempts) {
@@ -5546,9 +5551,10 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
5546 5551
5547 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); 5552 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
5548 } else { 5553 } else {
5549 SCTP_DEBUG_PRINTK("Giving up on INIT, attempts: %d" 5554 pr_debug("%s: giving up on INIT, attempts:%d "
5550 " max_init_attempts: %d\n", 5555 "max_init_attempts:%d\n", __func__, attempts,
5551 attempts, asoc->max_init_attempts); 5556 asoc->max_init_attempts);
5557
5552 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, 5558 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
5553 SCTP_ERROR(ETIMEDOUT)); 5559 SCTP_ERROR(ETIMEDOUT));
5554 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, 5560 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
@@ -5588,7 +5594,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
5588 struct sctp_chunk *repl = NULL; 5594 struct sctp_chunk *repl = NULL;
5589 int attempts = asoc->init_err_counter + 1; 5595 int attempts = asoc->init_err_counter + 1;
5590 5596
5591 SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); 5597 pr_debug("%s: timer T1 expired (COOKIE-ECHO)\n", __func__);
5598
5592 SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS); 5599 SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS);
5593 5600
5594 if (attempts <= asoc->max_init_attempts) { 5601 if (attempts <= asoc->max_init_attempts) {
@@ -5636,7 +5643,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
5636{ 5643{
5637 struct sctp_chunk *reply = NULL; 5644 struct sctp_chunk *reply = NULL;
5638 5645
5639 SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); 5646 pr_debug("%s: timer T2 expired\n", __func__);
5647
5640 SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS); 5648 SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
5641 5649
5642 ((struct sctp_association *)asoc)->shutdown_retries++; 5650 ((struct sctp_association *)asoc)->shutdown_retries++;
@@ -5777,7 +5785,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
5777{ 5785{
5778 struct sctp_chunk *reply = NULL; 5786 struct sctp_chunk *reply = NULL;
5779 5787
5780 SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); 5788 pr_debug("%s: timer T5 expired\n", __func__);
5789
5781 SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); 5790 SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
5782 5791
5783 reply = sctp_make_abort(asoc, NULL, 0); 5792 reply = sctp_make_abort(asoc, NULL, 0);
@@ -5892,7 +5901,8 @@ sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
5892 void *arg, 5901 void *arg,
5893 sctp_cmd_seq_t *commands) 5902 sctp_cmd_seq_t *commands)
5894{ 5903{
5895 SCTP_DEBUG_PRINTK("Timer %d ignored.\n", type.chunk); 5904 pr_debug("%s: timer %d ignored\n", __func__, type.chunk);
5905
5896 return SCTP_DISPOSITION_CONSUME; 5906 return SCTP_DISPOSITION_CONSUME;
5897} 5907}
5898 5908
@@ -6102,7 +6112,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
6102 skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); 6112 skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
6103 6113
6104 tsn = ntohl(data_hdr->tsn); 6114 tsn = ntohl(data_hdr->tsn);
6105 SCTP_DEBUG_PRINTK("eat_data: TSN 0x%x.\n", tsn); 6115 pr_debug("%s: TSN 0x%x\n", __func__, tsn);
6106 6116
6107 /* ASSERT: Now skb->data is really the user data. */ 6117 /* ASSERT: Now skb->data is really the user data. */
6108 6118
@@ -6179,12 +6189,12 @@ static int sctp_eat_data(const struct sctp_association *asoc,
6179 */ 6189 */
6180 if (sctp_tsnmap_has_gap(map) && 6190 if (sctp_tsnmap_has_gap(map) &&
6181 (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { 6191 (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
6182 SCTP_DEBUG_PRINTK("Reneging for tsn:%u\n", tsn); 6192 pr_debug("%s: reneging for tsn:%u\n", __func__, tsn);
6183 deliver = SCTP_CMD_RENEGE; 6193 deliver = SCTP_CMD_RENEGE;
6184 } else { 6194 } else {
6185 SCTP_DEBUG_PRINTK("Discard tsn: %u len: %Zd, " 6195 pr_debug("%s: discard tsn:%u len:%zu, rwnd:%d\n",
6186 "rwnd: %d\n", tsn, datalen, 6196 __func__, tsn, datalen, asoc->rwnd);
6187 asoc->rwnd); 6197
6188 return SCTP_IERROR_IGNORE_TSN; 6198 return SCTP_IERROR_IGNORE_TSN;
6189 } 6199 }
6190 } 6200 }
@@ -6199,7 +6209,8 @@ static int sctp_eat_data(const struct sctp_association *asoc,
6199 if (*sk->sk_prot_creator->memory_pressure) { 6209 if (*sk->sk_prot_creator->memory_pressure) {
6200 if (sctp_tsnmap_has_gap(map) && 6210 if (sctp_tsnmap_has_gap(map) &&
6201 (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { 6211 (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
6202 SCTP_DEBUG_PRINTK("Under Pressure! Reneging for tsn:%u\n", tsn); 6212 pr_debug("%s: under pressure, reneging for tsn:%u\n",
6213 __func__, tsn);
6203 deliver = SCTP_CMD_RENEGE; 6214 deliver = SCTP_CMD_RENEGE;
6204 } 6215 }
6205 } 6216 }
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 84d98d8a5a74..c5999b2dde7d 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -28,10 +28,7 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -41,9 +38,6 @@
41 * Daisy Chang <daisyc@us.ibm.com> 38 * Daisy Chang <daisyc@us.ibm.com>
42 * Ardelle Fan <ardelle.fan@intel.com> 39 * Ardelle Fan <ardelle.fan@intel.com>
43 * Sridhar Samudrala <sri@us.ibm.com> 40 * Sridhar Samudrala <sri@us.ibm.com>
44 *
45 * Any bugs reported given to us we will try to fix... any fixes shared will
46 * be incorporated into the next SCTP release.
47 */ 41 */
48 42
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f631c5ff4dbf..911b71b26b0e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -34,10 +34,7 @@
34 * 34 *
35 * Please send any bug reports or fixes you make to the 35 * Please send any bug reports or fixes you make to the
36 * email address(es): 36 * email address(es):
37 * lksctp developers <lksctp-developers@lists.sourceforge.net> 37 * lksctp developers <linux-sctp@vger.kernel.org>
38 *
39 * Or submit a bug report through the following website:
40 * http://www.sf.net/projects/lksctp
41 * 38 *
42 * Written or modified by: 39 * Written or modified by:
43 * La Monte H.P. Yarroll <piggy@acm.org> 40 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -52,9 +49,6 @@
52 * Ryan Layer <rmlayer@us.ibm.com> 49 * Ryan Layer <rmlayer@us.ibm.com>
53 * Anup Pemmaiah <pemmaiah@cc.usu.edu> 50 * Anup Pemmaiah <pemmaiah@cc.usu.edu>
54 * Kevin Gao <kevin.gao@intel.com> 51 * Kevin Gao <kevin.gao@intel.com>
55 *
56 * Any bugs reported given to us we will try to fix... any fixes shared will
57 * be incorporated into the next SCTP release.
58 */ 52 */
59 53
60#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 54#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -84,11 +78,6 @@
84#include <net/sctp/sctp.h> 78#include <net/sctp/sctp.h>
85#include <net/sctp/sm.h> 79#include <net/sctp/sm.h>
86 80
87/* WARNING: Please do not remove the SCTP_STATIC attribute to
88 * any of the functions below as they are used to export functions
89 * used by a project regression testsuite.
90 */
91
92/* Forward declarations for internal helper functions. */ 81/* Forward declarations for internal helper functions. */
93static int sctp_writeable(struct sock *sk); 82static int sctp_writeable(struct sock *sk);
94static void sctp_wfree(struct sk_buff *skb); 83static void sctp_wfree(struct sk_buff *skb);
@@ -98,6 +87,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p);
98static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); 87static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
99static int sctp_wait_for_accept(struct sock *sk, long timeo); 88static int sctp_wait_for_accept(struct sock *sk, long timeo);
100static void sctp_wait_for_close(struct sock *sk, long timeo); 89static void sctp_wait_for_close(struct sock *sk, long timeo);
90static void sctp_destruct_sock(struct sock *sk);
101static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, 91static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
102 union sctp_addr *addr, int len); 92 union sctp_addr *addr, int len);
103static int sctp_bindx_add(struct sock *, struct sockaddr *, int); 93static int sctp_bindx_add(struct sock *, struct sockaddr *, int);
@@ -279,14 +269,14 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
279 * sockaddr_in6 [RFC 2553]), 269 * sockaddr_in6 [RFC 2553]),
280 * addr_len - the size of the address structure. 270 * addr_len - the size of the address structure.
281 */ 271 */
282SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) 272static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len)
283{ 273{
284 int retval = 0; 274 int retval = 0;
285 275
286 sctp_lock_sock(sk); 276 sctp_lock_sock(sk);
287 277
288 SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, addr: %p, addr_len: %d)\n", 278 pr_debug("%s: sk:%p, addr:%p, addr_len:%d\n", __func__, sk,
289 sk, addr, addr_len); 279 addr, addr_len);
290 280
291 /* Disallow binding twice. */ 281 /* Disallow binding twice. */
292 if (!sctp_sk(sk)->ep->base.bind_addr.port) 282 if (!sctp_sk(sk)->ep->base.bind_addr.port)
@@ -333,7 +323,7 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
333} 323}
334 324
335/* Bind a local address either to an endpoint or to an association. */ 325/* Bind a local address either to an endpoint or to an association. */
336SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) 326static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
337{ 327{
338 struct net *net = sock_net(sk); 328 struct net *net = sock_net(sk);
339 struct sctp_sock *sp = sctp_sk(sk); 329 struct sctp_sock *sp = sctp_sk(sk);
@@ -346,19 +336,15 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
346 /* Common sockaddr verification. */ 336 /* Common sockaddr verification. */
347 af = sctp_sockaddr_af(sp, addr, len); 337 af = sctp_sockaddr_af(sp, addr, len);
348 if (!af) { 338 if (!af) {
349 SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d) EINVAL\n", 339 pr_debug("%s: sk:%p, newaddr:%p, len:%d EINVAL\n",
350 sk, addr, len); 340 __func__, sk, addr, len);
351 return -EINVAL; 341 return -EINVAL;
352 } 342 }
353 343
354 snum = ntohs(addr->v4.sin_port); 344 snum = ntohs(addr->v4.sin_port);
355 345
356 SCTP_DEBUG_PRINTK_IPADDR("sctp_do_bind(sk: %p, new addr: ", 346 pr_debug("%s: sk:%p, new addr:%pISc, port:%d, new port:%d, len:%d\n",
357 ", port: %d, new port: %d, len: %d)\n", 347 __func__, sk, &addr->sa, bp->port, snum, len);
358 sk,
359 addr,
360 bp->port, snum,
361 len);
362 348
363 /* PF specific bind() address verification. */ 349 /* PF specific bind() address verification. */
364 if (!sp->pf->bind_verify(sp, addr)) 350 if (!sp->pf->bind_verify(sp, addr))
@@ -372,9 +358,8 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
372 if (!snum) 358 if (!snum)
373 snum = bp->port; 359 snum = bp->port;
374 else if (snum != bp->port) { 360 else if (snum != bp->port) {
375 SCTP_DEBUG_PRINTK("sctp_do_bind:" 361 pr_debug("%s: new port %d doesn't match existing port "
376 " New port %d does not match existing port " 362 "%d\n", __func__, snum, bp->port);
377 "%d.\n", snum, bp->port);
378 return -EINVAL; 363 return -EINVAL;
379 } 364 }
380 } 365 }
@@ -472,8 +457,8 @@ static int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt)
472 struct sockaddr *sa_addr; 457 struct sockaddr *sa_addr;
473 struct sctp_af *af; 458 struct sctp_af *af;
474 459
475 SCTP_DEBUG_PRINTK("sctp_bindx_add (sk: %p, addrs: %p, addrcnt: %d)\n", 460 pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk,
476 sk, addrs, addrcnt); 461 addrs, addrcnt);
477 462
478 addr_buf = addrs; 463 addr_buf = addrs;
479 for (cnt = 0; cnt < addrcnt; cnt++) { 464 for (cnt = 0; cnt < addrcnt; cnt++) {
@@ -539,11 +524,10 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
539 sp = sctp_sk(sk); 524 sp = sctp_sk(sk);
540 ep = sp->ep; 525 ep = sp->ep;
541 526
542 SCTP_DEBUG_PRINTK("%s: (sk: %p, addrs: %p, addrcnt: %d)\n", 527 pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
543 __func__, sk, addrs, addrcnt); 528 __func__, sk, addrs, addrcnt);
544 529
545 list_for_each_entry(asoc, &ep->asocs, asocs) { 530 list_for_each_entry(asoc, &ep->asocs, asocs) {
546
547 if (!asoc->peer.asconf_capable) 531 if (!asoc->peer.asconf_capable)
548 continue; 532 continue;
549 533
@@ -650,8 +634,8 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
650 union sctp_addr *sa_addr; 634 union sctp_addr *sa_addr;
651 struct sctp_af *af; 635 struct sctp_af *af;
652 636
653 SCTP_DEBUG_PRINTK("sctp_bindx_rem (sk: %p, addrs: %p, addrcnt: %d)\n", 637 pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
654 sk, addrs, addrcnt); 638 __func__, sk, addrs, addrcnt);
655 639
656 addr_buf = addrs; 640 addr_buf = addrs;
657 for (cnt = 0; cnt < addrcnt; cnt++) { 641 for (cnt = 0; cnt < addrcnt; cnt++) {
@@ -744,8 +728,8 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
744 sp = sctp_sk(sk); 728 sp = sctp_sk(sk);
745 ep = sp->ep; 729 ep = sp->ep;
746 730
747 SCTP_DEBUG_PRINTK("%s: (sk: %p, addrs: %p, addrcnt: %d)\n", 731 pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
748 __func__, sk, addrs, addrcnt); 732 __func__, sk, addrs, addrcnt);
749 733
750 list_for_each_entry(asoc, &ep->asocs, asocs) { 734 list_for_each_entry(asoc, &ep->asocs, asocs) {
751 735
@@ -812,14 +796,19 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
812 sin6 = (struct sockaddr_in6 *)addrs; 796 sin6 = (struct sockaddr_in6 *)addrs;
813 asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr; 797 asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr;
814 } 798 }
815 SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ", 799
816 " at %p\n", asoc, asoc->asconf_addr_del_pending, 800 pr_debug("%s: keep the last address asoc:%p %pISc at %p\n",
817 asoc->asconf_addr_del_pending); 801 __func__, asoc, &asoc->asconf_addr_del_pending->sa,
802 asoc->asconf_addr_del_pending);
803
818 asoc->src_out_of_asoc_ok = 1; 804 asoc->src_out_of_asoc_ok = 1;
819 stored = 1; 805 stored = 1;
820 goto skip_mkasconf; 806 goto skip_mkasconf;
821 } 807 }
822 808
809 if (laddr == NULL)
810 return -EINVAL;
811
823 /* We do not need RCU protection throughout this loop 812 /* We do not need RCU protection throughout this loop
824 * because this is done under a socket lock from the 813 * because this is done under a socket lock from the
825 * setsockopt call. 814 * setsockopt call.
@@ -964,9 +953,9 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw)
964 * 953 *
965 * Returns 0 if ok, <0 errno code on error. 954 * Returns 0 if ok, <0 errno code on error.
966 */ 955 */
967SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk, 956static int sctp_setsockopt_bindx(struct sock* sk,
968 struct sockaddr __user *addrs, 957 struct sockaddr __user *addrs,
969 int addrs_size, int op) 958 int addrs_size, int op)
970{ 959{
971 struct sockaddr *kaddrs; 960 struct sockaddr *kaddrs;
972 int err; 961 int err;
@@ -976,8 +965,8 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
976 void *addr_buf; 965 void *addr_buf;
977 struct sctp_af *af; 966 struct sctp_af *af;
978 967
979 SCTP_DEBUG_PRINTK("sctp_setsockopt_bindx: sk %p addrs %p" 968 pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n",
980 " addrs_size %d opt %d\n", sk, addrs, addrs_size, op); 969 __func__, sk, addrs, addrs_size, op);
981 970
982 if (unlikely(addrs_size <= 0)) 971 if (unlikely(addrs_size <= 0))
983 return -EINVAL; 972 return -EINVAL;
@@ -1235,10 +1224,9 @@ static int __sctp_connect(struct sock* sk,
1235 asoc = NULL; 1224 asoc = NULL;
1236 1225
1237out_free: 1226out_free:
1227 pr_debug("%s: took out_free path with asoc:%p kaddrs:%p err:%d\n",
1228 __func__, asoc, kaddrs, err);
1238 1229
1239 SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p"
1240 " kaddrs: %p err: %d\n",
1241 asoc, kaddrs, err);
1242 if (asoc) { 1230 if (asoc) {
1243 /* sctp_primitive_ASSOCIATE may have added this association 1231 /* sctp_primitive_ASSOCIATE may have added this association
1244 * To the hash table, try to unhash it, just in case, its a noop 1232 * To the hash table, try to unhash it, just in case, its a noop
@@ -1312,7 +1300,7 @@ out_free:
1312 * 1300 *
1313 * Returns >=0 if ok, <0 errno code on error. 1301 * Returns >=0 if ok, <0 errno code on error.
1314 */ 1302 */
1315SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, 1303static int __sctp_setsockopt_connectx(struct sock* sk,
1316 struct sockaddr __user *addrs, 1304 struct sockaddr __user *addrs,
1317 int addrs_size, 1305 int addrs_size,
1318 sctp_assoc_t *assoc_id) 1306 sctp_assoc_t *assoc_id)
@@ -1320,8 +1308,8 @@ SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk,
1320 int err = 0; 1308 int err = 0;
1321 struct sockaddr *kaddrs; 1309 struct sockaddr *kaddrs;
1322 1310
1323 SCTP_DEBUG_PRINTK("%s - sk %p addrs %p addrs_size %d\n", 1311 pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
1324 __func__, sk, addrs, addrs_size); 1312 __func__, sk, addrs, addrs_size);
1325 1313
1326 if (unlikely(addrs_size <= 0)) 1314 if (unlikely(addrs_size <= 0))
1327 return -EINVAL; 1315 return -EINVAL;
@@ -1350,9 +1338,9 @@ SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk,
1350 * This is an older interface. It's kept for backward compatibility 1338 * This is an older interface. It's kept for backward compatibility
1351 * to the option that doesn't provide association id. 1339 * to the option that doesn't provide association id.
1352 */ 1340 */
1353SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, 1341static int sctp_setsockopt_connectx_old(struct sock* sk,
1354 struct sockaddr __user *addrs, 1342 struct sockaddr __user *addrs,
1355 int addrs_size) 1343 int addrs_size)
1356{ 1344{
1357 return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL); 1345 return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL);
1358} 1346}
@@ -1363,9 +1351,9 @@ SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk,
1363 * indication to the call. Error is always negative and association id is 1351 * indication to the call. Error is always negative and association id is
1364 * always positive. 1352 * always positive.
1365 */ 1353 */
1366SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, 1354static int sctp_setsockopt_connectx(struct sock* sk,
1367 struct sockaddr __user *addrs, 1355 struct sockaddr __user *addrs,
1368 int addrs_size) 1356 int addrs_size)
1369{ 1357{
1370 sctp_assoc_t assoc_id = 0; 1358 sctp_assoc_t assoc_id = 0;
1371 int err = 0; 1359 int err = 0;
@@ -1386,9 +1374,9 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
1386 * addrs_num structure member. That way we can re-use the existing 1374 * addrs_num structure member. That way we can re-use the existing
1387 * code. 1375 * code.
1388 */ 1376 */
1389SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, 1377static int sctp_getsockopt_connectx3(struct sock* sk, int len,
1390 char __user *optval, 1378 char __user *optval,
1391 int __user *optlen) 1379 int __user *optlen)
1392{ 1380{
1393 struct sctp_getaddrs_old param; 1381 struct sctp_getaddrs_old param;
1394 sctp_assoc_t assoc_id = 0; 1382 sctp_assoc_t assoc_id = 0;
@@ -1464,7 +1452,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len,
1464 * shutdown phase does not finish during this period, close() will 1452 * shutdown phase does not finish during this period, close() will
1465 * return but the graceful shutdown phase continues in the system. 1453 * return but the graceful shutdown phase continues in the system.
1466 */ 1454 */
1467SCTP_STATIC void sctp_close(struct sock *sk, long timeout) 1455static void sctp_close(struct sock *sk, long timeout)
1468{ 1456{
1469 struct net *net = sock_net(sk); 1457 struct net *net = sock_net(sk);
1470 struct sctp_endpoint *ep; 1458 struct sctp_endpoint *ep;
@@ -1472,7 +1460,7 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
1472 struct list_head *pos, *temp; 1460 struct list_head *pos, *temp;
1473 unsigned int data_was_unread; 1461 unsigned int data_was_unread;
1474 1462
1475 SCTP_DEBUG_PRINTK("sctp_close(sk: 0x%p, timeout:%ld)\n", sk, timeout); 1463 pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout);
1476 1464
1477 sctp_lock_sock(sk); 1465 sctp_lock_sock(sk);
1478 sk->sk_shutdown = SHUTDOWN_MASK; 1466 sk->sk_shutdown = SHUTDOWN_MASK;
@@ -1573,10 +1561,10 @@ static int sctp_error(struct sock *sk, int flags, int err)
1573 */ 1561 */
1574/* BUG: We do not implement the equivalent of sk_stream_wait_memory(). */ 1562/* BUG: We do not implement the equivalent of sk_stream_wait_memory(). */
1575 1563
1576SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); 1564static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
1577 1565
1578SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, 1566static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1579 struct msghdr *msg, size_t msg_len) 1567 struct msghdr *msg, size_t msg_len)
1580{ 1568{
1581 struct net *net = sock_net(sk); 1569 struct net *net = sock_net(sk);
1582 struct sctp_sock *sp; 1570 struct sctp_sock *sp;
@@ -1598,14 +1586,12 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1598 struct sctp_datamsg *datamsg; 1586 struct sctp_datamsg *datamsg;
1599 int msg_flags = msg->msg_flags; 1587 int msg_flags = msg->msg_flags;
1600 1588
1601 SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %zu)\n",
1602 sk, msg, msg_len);
1603
1604 err = 0; 1589 err = 0;
1605 sp = sctp_sk(sk); 1590 sp = sctp_sk(sk);
1606 ep = sp->ep; 1591 ep = sp->ep;
1607 1592
1608 SCTP_DEBUG_PRINTK("Using endpoint: %p.\n", ep); 1593 pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk,
1594 msg, msg_len, ep);
1609 1595
1610 /* We cannot send a message over a TCP-style listening socket. */ 1596 /* We cannot send a message over a TCP-style listening socket. */
1611 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) { 1597 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
@@ -1615,9 +1601,8 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1615 1601
1616 /* Parse out the SCTP CMSGs. */ 1602 /* Parse out the SCTP CMSGs. */
1617 err = sctp_msghdr_parse(msg, &cmsgs); 1603 err = sctp_msghdr_parse(msg, &cmsgs);
1618
1619 if (err) { 1604 if (err) {
1620 SCTP_DEBUG_PRINTK("msghdr parse err = %x\n", err); 1605 pr_debug("%s: msghdr parse err:%x\n", __func__, err);
1621 goto out_nounlock; 1606 goto out_nounlock;
1622 } 1607 }
1623 1608
@@ -1649,8 +1634,8 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1649 associd = sinfo->sinfo_assoc_id; 1634 associd = sinfo->sinfo_assoc_id;
1650 } 1635 }
1651 1636
1652 SCTP_DEBUG_PRINTK("msg_len: %zu, sinfo_flags: 0x%x\n", 1637 pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__,
1653 msg_len, sinfo_flags); 1638 msg_len, sinfo_flags);
1654 1639
1655 /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */ 1640 /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */
1656 if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) { 1641 if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) {
@@ -1679,7 +1664,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1679 1664
1680 transport = NULL; 1665 transport = NULL;
1681 1666
1682 SCTP_DEBUG_PRINTK("About to look up association.\n"); 1667 pr_debug("%s: about to look up association\n", __func__);
1683 1668
1684 sctp_lock_sock(sk); 1669 sctp_lock_sock(sk);
1685 1670
@@ -1709,7 +1694,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1709 } 1694 }
1710 1695
1711 if (asoc) { 1696 if (asoc) {
1712 SCTP_DEBUG_PRINTK("Just looked up association: %p.\n", asoc); 1697 pr_debug("%s: just looked up association:%p\n", __func__, asoc);
1713 1698
1714 /* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED 1699 /* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED
1715 * socket that has an association in CLOSED state. This can 1700 * socket that has an association in CLOSED state. This can
@@ -1722,8 +1707,9 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1722 } 1707 }
1723 1708
1724 if (sinfo_flags & SCTP_EOF) { 1709 if (sinfo_flags & SCTP_EOF) {
1725 SCTP_DEBUG_PRINTK("Shutting down association: %p\n", 1710 pr_debug("%s: shutting down association:%p\n",
1726 asoc); 1711 __func__, asoc);
1712
1727 sctp_primitive_SHUTDOWN(net, asoc, NULL); 1713 sctp_primitive_SHUTDOWN(net, asoc, NULL);
1728 err = 0; 1714 err = 0;
1729 goto out_unlock; 1715 goto out_unlock;
@@ -1736,7 +1722,9 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1736 goto out_unlock; 1722 goto out_unlock;
1737 } 1723 }
1738 1724
1739 SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); 1725 pr_debug("%s: aborting association:%p\n",
1726 __func__, asoc);
1727
1740 sctp_primitive_ABORT(net, asoc, chunk); 1728 sctp_primitive_ABORT(net, asoc, chunk);
1741 err = 0; 1729 err = 0;
1742 goto out_unlock; 1730 goto out_unlock;
@@ -1745,7 +1733,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1745 1733
1746 /* Do we need to create the association? */ 1734 /* Do we need to create the association? */
1747 if (!asoc) { 1735 if (!asoc) {
1748 SCTP_DEBUG_PRINTK("There is no association yet.\n"); 1736 pr_debug("%s: there is no association yet\n", __func__);
1749 1737
1750 if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) { 1738 if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) {
1751 err = -EINVAL; 1739 err = -EINVAL;
@@ -1844,7 +1832,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1844 } 1832 }
1845 1833
1846 /* ASSERT: we have a valid association at this point. */ 1834 /* ASSERT: we have a valid association at this point. */
1847 SCTP_DEBUG_PRINTK("We have a valid association.\n"); 1835 pr_debug("%s: we have a valid association\n", __func__);
1848 1836
1849 if (!sinfo) { 1837 if (!sinfo) {
1850 /* If the user didn't specify SNDRCVINFO, make up one with 1838 /* If the user didn't specify SNDRCVINFO, make up one with
@@ -1913,7 +1901,8 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1913 err = sctp_primitive_ASSOCIATE(net, asoc, NULL); 1901 err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
1914 if (err < 0) 1902 if (err < 0)
1915 goto out_free; 1903 goto out_free;
1916 SCTP_DEBUG_PRINTK("We associated primitively.\n"); 1904
1905 pr_debug("%s: we associated primitively\n", __func__);
1917 } 1906 }
1918 1907
1919 /* Break the message into multiple chunks of maximum size. */ 1908 /* Break the message into multiple chunks of maximum size. */
@@ -1940,17 +1929,15 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1940 */ 1929 */
1941 err = sctp_primitive_SEND(net, asoc, datamsg); 1930 err = sctp_primitive_SEND(net, asoc, datamsg);
1942 /* Did the lower layer accept the chunk? */ 1931 /* Did the lower layer accept the chunk? */
1943 if (err) 1932 if (err) {
1944 sctp_datamsg_free(datamsg); 1933 sctp_datamsg_free(datamsg);
1945 else 1934 goto out_free;
1946 sctp_datamsg_put(datamsg); 1935 }
1947 1936
1948 SCTP_DEBUG_PRINTK("We sent primitively.\n"); 1937 pr_debug("%s: we sent primitively\n", __func__);
1949 1938
1950 if (err) 1939 sctp_datamsg_put(datamsg);
1951 goto out_free; 1940 err = msg_len;
1952 else
1953 err = msg_len;
1954 1941
1955 /* If we are already past ASSOCIATE, the lower 1942 /* If we are already past ASSOCIATE, the lower
1956 * layers are responsible for association cleanup. 1943 * layers are responsible for association cleanup.
@@ -2034,9 +2021,9 @@ static int sctp_skb_pull(struct sk_buff *skb, int len)
2034 */ 2021 */
2035static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); 2022static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
2036 2023
2037SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, 2024static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
2038 struct msghdr *msg, size_t len, int noblock, 2025 struct msghdr *msg, size_t len, int noblock,
2039 int flags, int *addr_len) 2026 int flags, int *addr_len)
2040{ 2027{
2041 struct sctp_ulpevent *event = NULL; 2028 struct sctp_ulpevent *event = NULL;
2042 struct sctp_sock *sp = sctp_sk(sk); 2029 struct sctp_sock *sp = sctp_sk(sk);
@@ -2045,10 +2032,9 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
2045 int err = 0; 2032 int err = 0;
2046 int skb_len; 2033 int skb_len;
2047 2034
2048 SCTP_DEBUG_PRINTK("sctp_recvmsg(%s: %p, %s: %p, %s: %zd, %s: %d, %s: " 2035 pr_debug("%s: sk:%p, msghdr:%p, len:%zd, noblock:%d, flags:0x%x, "
2049 "0x%x, %s: %p)\n", "sk", sk, "msghdr", msg, 2036 "addr_len:%p)\n", __func__, sk, msg, len, noblock, flags,
2050 "len", len, "knoblauch", noblock, 2037 addr_len);
2051 "flags", flags, "addr_len", addr_len);
2052 2038
2053 sctp_lock_sock(sk); 2039 sctp_lock_sock(sk);
2054 2040
@@ -2915,13 +2901,8 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsig
2915 asoc->max_retrans = assocparams.sasoc_asocmaxrxt; 2901 asoc->max_retrans = assocparams.sasoc_asocmaxrxt;
2916 } 2902 }
2917 2903
2918 if (assocparams.sasoc_cookie_life != 0) { 2904 if (assocparams.sasoc_cookie_life != 0)
2919 asoc->cookie_life.tv_sec = 2905 asoc->cookie_life = ms_to_ktime(assocparams.sasoc_cookie_life);
2920 assocparams.sasoc_cookie_life / 1000;
2921 asoc->cookie_life.tv_usec =
2922 (assocparams.sasoc_cookie_life % 1000)
2923 * 1000;
2924 }
2925 } else { 2906 } else {
2926 /* Set the values to the endpoint */ 2907 /* Set the values to the endpoint */
2927 struct sctp_sock *sp = sctp_sk(sk); 2908 struct sctp_sock *sp = sctp_sk(sk);
@@ -3095,7 +3076,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
3095 3076
3096 err = sctp_send_asconf(asoc, chunk); 3077 err = sctp_send_asconf(asoc, chunk);
3097 3078
3098 SCTP_DEBUG_PRINTK("We set peer primary addr primitively.\n"); 3079 pr_debug("%s: we set peer primary addr primitively\n", __func__);
3099 3080
3100 return err; 3081 return err;
3101} 3082}
@@ -3565,13 +3546,12 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
3565 * optval - the buffer to store the value of the option. 3546 * optval - the buffer to store the value of the option.
3566 * optlen - the size of the buffer. 3547 * optlen - the size of the buffer.
3567 */ 3548 */
3568SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, 3549static int sctp_setsockopt(struct sock *sk, int level, int optname,
3569 char __user *optval, unsigned int optlen) 3550 char __user *optval, unsigned int optlen)
3570{ 3551{
3571 int retval = 0; 3552 int retval = 0;
3572 3553
3573 SCTP_DEBUG_PRINTK("sctp_setsockopt(sk: %p... optname: %d)\n", 3554 pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname);
3574 sk, optname);
3575 3555
3576 /* I can hardly begin to describe how wrong this is. This is 3556 /* I can hardly begin to describe how wrong this is. This is
3577 * so broken as to be worse than useless. The API draft 3557 * so broken as to be worse than useless. The API draft
@@ -3725,16 +3705,16 @@ out_nounlock:
3725 * 3705 *
3726 * len: the size of the address. 3706 * len: the size of the address.
3727 */ 3707 */
3728SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, 3708static int sctp_connect(struct sock *sk, struct sockaddr *addr,
3729 int addr_len) 3709 int addr_len)
3730{ 3710{
3731 int err = 0; 3711 int err = 0;
3732 struct sctp_af *af; 3712 struct sctp_af *af;
3733 3713
3734 sctp_lock_sock(sk); 3714 sctp_lock_sock(sk);
3735 3715
3736 SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d\n", 3716 pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk,
3737 __func__, sk, addr, addr_len); 3717 addr, addr_len);
3738 3718
3739 /* Validate addr_len before calling common connect/connectx routine. */ 3719 /* Validate addr_len before calling common connect/connectx routine. */
3740 af = sctp_get_af_specific(addr->sa_family); 3720 af = sctp_get_af_specific(addr->sa_family);
@@ -3752,7 +3732,7 @@ SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr,
3752} 3732}
3753 3733
3754/* FIXME: Write comments. */ 3734/* FIXME: Write comments. */
3755SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) 3735static int sctp_disconnect(struct sock *sk, int flags)
3756{ 3736{
3757 return -EOPNOTSUPP; /* STUB */ 3737 return -EOPNOTSUPP; /* STUB */
3758} 3738}
@@ -3764,7 +3744,7 @@ SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags)
3764 * descriptor will be returned from accept() to represent the newly 3744 * descriptor will be returned from accept() to represent the newly
3765 * formed association. 3745 * formed association.
3766 */ 3746 */
3767SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) 3747static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
3768{ 3748{
3769 struct sctp_sock *sp; 3749 struct sctp_sock *sp;
3770 struct sctp_endpoint *ep; 3750 struct sctp_endpoint *ep;
@@ -3817,7 +3797,7 @@ out:
3817} 3797}
3818 3798
3819/* The SCTP ioctl handler. */ 3799/* The SCTP ioctl handler. */
3820SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) 3800static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
3821{ 3801{
3822 int rc = -ENOTCONN; 3802 int rc = -ENOTCONN;
3823 3803
@@ -3859,13 +3839,12 @@ out:
3859 * initialized the SCTP-specific portion of the sock. 3839 * initialized the SCTP-specific portion of the sock.
3860 * The sock structure should already be zero-filled memory. 3840 * The sock structure should already be zero-filled memory.
3861 */ 3841 */
3862SCTP_STATIC int sctp_init_sock(struct sock *sk) 3842static int sctp_init_sock(struct sock *sk)
3863{ 3843{
3864 struct net *net = sock_net(sk); 3844 struct net *net = sock_net(sk);
3865 struct sctp_endpoint *ep;
3866 struct sctp_sock *sp; 3845 struct sctp_sock *sp;
3867 3846
3868 SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk); 3847 pr_debug("%s: sk:%p\n", __func__, sk);
3869 3848
3870 sp = sctp_sk(sk); 3849 sp = sctp_sk(sk);
3871 3850
@@ -3971,13 +3950,14 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3971 * change the data structure relationships, this may still 3950 * change the data structure relationships, this may still
3972 * be useful for storing pre-connect address information. 3951 * be useful for storing pre-connect address information.
3973 */ 3952 */
3974 ep = sctp_endpoint_new(sk, GFP_KERNEL); 3953 sp->ep = sctp_endpoint_new(sk, GFP_KERNEL);
3975 if (!ep) 3954 if (!sp->ep)
3976 return -ENOMEM; 3955 return -ENOMEM;
3977 3956
3978 sp->ep = ep;
3979 sp->hmac = NULL; 3957 sp->hmac = NULL;
3980 3958
3959 sk->sk_destruct = sctp_destruct_sock;
3960
3981 SCTP_DBG_OBJCNT_INC(sock); 3961 SCTP_DBG_OBJCNT_INC(sock);
3982 3962
3983 local_bh_disable(); 3963 local_bh_disable();
@@ -3995,14 +3975,20 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3995} 3975}
3996 3976
3997/* Cleanup any SCTP per socket resources. */ 3977/* Cleanup any SCTP per socket resources. */
3998SCTP_STATIC void sctp_destroy_sock(struct sock *sk) 3978static void sctp_destroy_sock(struct sock *sk)
3999{ 3979{
4000 struct sctp_sock *sp; 3980 struct sctp_sock *sp;
4001 3981
4002 SCTP_DEBUG_PRINTK("sctp_destroy_sock(sk: %p)\n", sk); 3982 pr_debug("%s: sk:%p\n", __func__, sk);
4003 3983
4004 /* Release our hold on the endpoint. */ 3984 /* Release our hold on the endpoint. */
4005 sp = sctp_sk(sk); 3985 sp = sctp_sk(sk);
3986 /* This could happen during socket init, thus we bail out
3987 * early, since the rest of the below is not setup either.
3988 */
3989 if (sp->ep == NULL)
3990 return;
3991
4006 if (sp->do_auto_asconf) { 3992 if (sp->do_auto_asconf) {
4007 sp->do_auto_asconf = 0; 3993 sp->do_auto_asconf = 0;
4008 list_del(&sp->auto_asconf_list); 3994 list_del(&sp->auto_asconf_list);
@@ -4014,6 +4000,17 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
4014 local_bh_enable(); 4000 local_bh_enable();
4015} 4001}
4016 4002
4003/* Triggered when there are no references on the socket anymore */
4004static void sctp_destruct_sock(struct sock *sk)
4005{
4006 struct sctp_sock *sp = sctp_sk(sk);
4007
4008 /* Free up the HMAC transform. */
4009 crypto_free_hash(sp->hmac);
4010
4011 inet_sock_destruct(sk);
4012}
4013
4017/* API 4.1.7 shutdown() - TCP Style Syntax 4014/* API 4.1.7 shutdown() - TCP Style Syntax
4018 * int shutdown(int socket, int how); 4015 * int shutdown(int socket, int how);
4019 * 4016 *
@@ -4030,7 +4027,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
4030 * Disables further send and receive operations 4027 * Disables further send and receive operations
4031 * and initiates the SCTP shutdown sequence. 4028 * and initiates the SCTP shutdown sequence.
4032 */ 4029 */
4033SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) 4030static void sctp_shutdown(struct sock *sk, int how)
4034{ 4031{
4035 struct net *net = sock_net(sk); 4032 struct net *net = sock_net(sk);
4036 struct sctp_endpoint *ep; 4033 struct sctp_endpoint *ep;
@@ -4115,9 +4112,9 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
4115 goto out; 4112 goto out;
4116 } 4113 }
4117 4114
4118 SCTP_DEBUG_PRINTK("sctp_getsockopt_sctp_status(%d): %d %d %d\n", 4115 pr_debug("%s: len:%d, state:%d, rwnd:%d, assoc_id:%d\n",
4119 len, status.sstat_state, status.sstat_rwnd, 4116 __func__, len, status.sstat_state, status.sstat_rwnd,
4120 status.sstat_assoc_id); 4117 status.sstat_assoc_id);
4121 4118
4122 if (copy_to_user(optval, &status, len)) { 4119 if (copy_to_user(optval, &status, len)) {
4123 retval = -EFAULT; 4120 retval = -EFAULT;
@@ -4312,7 +4309,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
4312 goto out; 4309 goto out;
4313 4310
4314 /* Map the socket to an unused fd that can be returned to the user. */ 4311 /* Map the socket to an unused fd that can be returned to the user. */
4315 retval = get_unused_fd(); 4312 retval = get_unused_fd_flags(0);
4316 if (retval < 0) { 4313 if (retval < 0) {
4317 sock_release(newsock); 4314 sock_release(newsock);
4318 goto out; 4315 goto out;
@@ -4325,8 +4322,8 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
4325 return PTR_ERR(newfile); 4322 return PTR_ERR(newfile);
4326 } 4323 }
4327 4324
4328 SCTP_DEBUG_PRINTK("%s: sk: %p newsk: %p sd: %d\n", 4325 pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk,
4329 __func__, sk, newsock->sk, retval); 4326 retval);
4330 4327
4331 /* Return the fd mapped to the new socket. */ 4328 /* Return the fd mapped to the new socket. */
4332 if (put_user(len, optlen)) { 4329 if (put_user(len, optlen)) {
@@ -4459,7 +4456,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
4459 trans = sctp_addr_id2transport(sk, &params.spp_address, 4456 trans = sctp_addr_id2transport(sk, &params.spp_address,
4460 params.spp_assoc_id); 4457 params.spp_assoc_id);
4461 if (!trans) { 4458 if (!trans) {
4462 SCTP_DEBUG_PRINTK("Failed no transport\n"); 4459 pr_debug("%s: failed no transport\n", __func__);
4463 return -EINVAL; 4460 return -EINVAL;
4464 } 4461 }
4465 } 4462 }
@@ -4470,7 +4467,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
4470 */ 4467 */
4471 asoc = sctp_id2assoc(sk, params.spp_assoc_id); 4468 asoc = sctp_id2assoc(sk, params.spp_assoc_id);
4472 if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { 4469 if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) {
4473 SCTP_DEBUG_PRINTK("Failed no association\n"); 4470 pr_debug("%s: failed no association\n", __func__);
4474 return -EINVAL; 4471 return -EINVAL;
4475 } 4472 }
4476 4473
@@ -5075,10 +5072,7 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len,
5075 assocparams.sasoc_asocmaxrxt = asoc->max_retrans; 5072 assocparams.sasoc_asocmaxrxt = asoc->max_retrans;
5076 assocparams.sasoc_peer_rwnd = asoc->peer.rwnd; 5073 assocparams.sasoc_peer_rwnd = asoc->peer.rwnd;
5077 assocparams.sasoc_local_rwnd = asoc->a_rwnd; 5074 assocparams.sasoc_local_rwnd = asoc->a_rwnd;
5078 assocparams.sasoc_cookie_life = (asoc->cookie_life.tv_sec 5075 assocparams.sasoc_cookie_life = ktime_to_ms(asoc->cookie_life);
5079 * 1000) +
5080 (asoc->cookie_life.tv_usec
5081 / 1000);
5082 5076
5083 list_for_each(pos, &asoc->peer.transport_addr_list) { 5077 list_for_each(pos, &asoc->peer.transport_addr_list) {
5084 cnt ++; 5078 cnt ++;
@@ -5693,8 +5687,7 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len,
5693 if (put_user(len, optlen)) 5687 if (put_user(len, optlen))
5694 return -EFAULT; 5688 return -EFAULT;
5695 5689
5696 SCTP_DEBUG_PRINTK("sctp_getsockopt_assoc_stat(%d): %d\n", 5690 pr_debug("%s: len:%d, assoc_id:%d\n", __func__, len, sas.sas_assoc_id);
5697 len, sas.sas_assoc_id);
5698 5691
5699 if (copy_to_user(optval, &sas, len)) 5692 if (copy_to_user(optval, &sas, len))
5700 return -EFAULT; 5693 return -EFAULT;
@@ -5702,14 +5695,13 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len,
5702 return 0; 5695 return 0;
5703} 5696}
5704 5697
5705SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, 5698static int sctp_getsockopt(struct sock *sk, int level, int optname,
5706 char __user *optval, int __user *optlen) 5699 char __user *optval, int __user *optlen)
5707{ 5700{
5708 int retval = 0; 5701 int retval = 0;
5709 int len; 5702 int len;
5710 5703
5711 SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p... optname: %d)\n", 5704 pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname);
5712 sk, optname);
5713 5705
5714 /* I can hardly begin to describe how wrong this is. This is 5706 /* I can hardly begin to describe how wrong this is. This is
5715 * so broken as to be worse than useless. The API draft 5707 * so broken as to be worse than useless. The API draft
@@ -5889,7 +5881,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
5889 5881
5890 snum = ntohs(addr->v4.sin_port); 5882 snum = ntohs(addr->v4.sin_port);
5891 5883
5892 SCTP_DEBUG_PRINTK("sctp_get_port() begins, snum=%d\n", snum); 5884 pr_debug("%s: begins, snum:%d\n", __func__, snum);
5885
5893 sctp_local_bh_disable(); 5886 sctp_local_bh_disable();
5894 5887
5895 if (snum == 0) { 5888 if (snum == 0) {
@@ -5955,7 +5948,8 @@ pp_found:
5955 int reuse = sk->sk_reuse; 5948 int reuse = sk->sk_reuse;
5956 struct sock *sk2; 5949 struct sock *sk2;
5957 5950
5958 SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n"); 5951 pr_debug("%s: found a possible match\n", __func__);
5952
5959 if (pp->fastreuse && sk->sk_reuse && 5953 if (pp->fastreuse && sk->sk_reuse &&
5960 sk->sk_state != SCTP_SS_LISTENING) 5954 sk->sk_state != SCTP_SS_LISTENING)
5961 goto success; 5955 goto success;
@@ -5985,7 +5979,8 @@ pp_found:
5985 goto fail_unlock; 5979 goto fail_unlock;
5986 } 5980 }
5987 } 5981 }
5988 SCTP_DEBUG_PRINTK("sctp_get_port(): Found a match\n"); 5982
5983 pr_debug("%s: found a match\n", __func__);
5989 } 5984 }
5990pp_not_found: 5985pp_not_found:
5991 /* If there was a hash table miss, create a new port. */ 5986 /* If there was a hash table miss, create a new port. */
@@ -6031,7 +6026,6 @@ fail:
6031 */ 6026 */
6032static int sctp_get_port(struct sock *sk, unsigned short snum) 6027static int sctp_get_port(struct sock *sk, unsigned short snum)
6033{ 6028{
6034 long ret;
6035 union sctp_addr addr; 6029 union sctp_addr addr;
6036 struct sctp_af *af = sctp_sk(sk)->pf->af; 6030 struct sctp_af *af = sctp_sk(sk)->pf->af;
6037 6031
@@ -6040,15 +6034,13 @@ static int sctp_get_port(struct sock *sk, unsigned short snum)
6040 addr.v4.sin_port = htons(snum); 6034 addr.v4.sin_port = htons(snum);
6041 6035
6042 /* Note: sk->sk_num gets filled in if ephemeral port request. */ 6036 /* Note: sk->sk_num gets filled in if ephemeral port request. */
6043 ret = sctp_get_port_local(sk, &addr); 6037 return !!sctp_get_port_local(sk, &addr);
6044
6045 return ret ? 1 : 0;
6046} 6038}
6047 6039
6048/* 6040/*
6049 * Move a socket to LISTENING state. 6041 * Move a socket to LISTENING state.
6050 */ 6042 */
6051SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) 6043static int sctp_listen_start(struct sock *sk, int backlog)
6052{ 6044{
6053 struct sctp_sock *sp = sctp_sk(sk); 6045 struct sctp_sock *sp = sctp_sk(sk);
6054 struct sctp_endpoint *ep = sp->ep; 6046 struct sctp_endpoint *ep = sp->ep;
@@ -6187,7 +6179,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
6187 /* Is there any exceptional events? */ 6179 /* Is there any exceptional events? */
6188 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 6180 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
6189 mask |= POLLERR | 6181 mask |= POLLERR |
6190 sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; 6182 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
6191 if (sk->sk_shutdown & RCV_SHUTDOWN) 6183 if (sk->sk_shutdown & RCV_SHUTDOWN)
6192 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 6184 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
6193 if (sk->sk_shutdown == SHUTDOWN_MASK) 6185 if (sk->sk_shutdown == SHUTDOWN_MASK)
@@ -6335,8 +6327,7 @@ static int sctp_autobind(struct sock *sk)
6335 * msg_control 6327 * msg_control
6336 * points here 6328 * points here
6337 */ 6329 */
6338SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg, 6330static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
6339 sctp_cmsgs_t *cmsgs)
6340{ 6331{
6341 struct cmsghdr *cmsg; 6332 struct cmsghdr *cmsg;
6342 struct msghdr *my_msg = (struct msghdr *)msg; 6333 struct msghdr *my_msg = (struct msghdr *)msg;
@@ -6478,8 +6469,8 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
6478 6469
6479 timeo = sock_rcvtimeo(sk, noblock); 6470 timeo = sock_rcvtimeo(sk, noblock);
6480 6471
6481 SCTP_DEBUG_PRINTK("Timeout: timeo: %ld, MAX: %ld.\n", 6472 pr_debug("%s: timeo:%ld, max:%ld\n", __func__, timeo,
6482 timeo, MAX_SCHEDULE_TIMEOUT); 6473 MAX_SCHEDULE_TIMEOUT);
6483 6474
6484 do { 6475 do {
6485 /* Again only user level code calls this function, 6476 /* Again only user level code calls this function,
@@ -6610,8 +6601,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
6610 long current_timeo = *timeo_p; 6601 long current_timeo = *timeo_p;
6611 DEFINE_WAIT(wait); 6602 DEFINE_WAIT(wait);
6612 6603
6613 SCTP_DEBUG_PRINTK("wait_for_sndbuf: asoc=%p, timeo=%ld, msg_len=%zu\n", 6604 pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
6614 asoc, (long)(*timeo_p), msg_len); 6605 *timeo_p, msg_len);
6615 6606
6616 /* Increment the association's refcnt. */ 6607 /* Increment the association's refcnt. */
6617 sctp_association_hold(asoc); 6608 sctp_association_hold(asoc);
@@ -6717,8 +6708,7 @@ static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p)
6717 long current_timeo = *timeo_p; 6708 long current_timeo = *timeo_p;
6718 DEFINE_WAIT(wait); 6709 DEFINE_WAIT(wait);
6719 6710
6720 SCTP_DEBUG_PRINTK("%s: asoc=%p, timeo=%ld\n", __func__, asoc, 6711 pr_debug("%s: asoc:%p, timeo:%ld\n", __func__, asoc, *timeo_p);
6721 (long)(*timeo_p));
6722 6712
6723 /* Increment the association's refcnt. */ 6713 /* Increment the association's refcnt. */
6724 sctp_association_hold(asoc); 6714 sctp_association_hold(asoc);
@@ -6858,7 +6848,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
6858 newsk->sk_reuse = sk->sk_reuse; 6848 newsk->sk_reuse = sk->sk_reuse;
6859 6849
6860 newsk->sk_shutdown = sk->sk_shutdown; 6850 newsk->sk_shutdown = sk->sk_shutdown;
6861 newsk->sk_destruct = inet_sock_destruct; 6851 newsk->sk_destruct = sctp_destruct_sock;
6862 newsk->sk_family = sk->sk_family; 6852 newsk->sk_family = sk->sk_family;
6863 newsk->sk_protocol = IPPROTO_SCTP; 6853 newsk->sk_protocol = IPPROTO_SCTP;
6864 newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; 6854 newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index da8603523808..6007124aefa0 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -24,16 +24,10 @@
24 * 24 *
25 * Please send any bug reports or fixes you make to the 25 * Please send any bug reports or fixes you make to the
26 * email address(es): 26 * email address(es):
27 * lksctp developers <lksctp-developers@lists.sourceforge.net> 27 * lksctp developers <linux-sctp@vger.kernel.org>
28 *
29 * Or submit a bug report through the following website:
30 * http://www.sf.net/projects/lksctp
31 * 28 *
32 * Written or modified by: 29 * Written or modified by:
33 * Jon Grimm <jgrimm@us.ibm.com> 30 * Jon Grimm <jgrimm@us.ibm.com>
34 *
35 * Any bugs reported given to us we will try to fix... any fixes shared will
36 * be incorporated into the next SCTP release.
37 */ 31 */
38 32
39#include <linux/types.h> 33#include <linux/types.h>
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index bf3c6e8fc401..6b36561a1b3b 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -25,10 +25,7 @@
25 * 25 *
26 * Please send any bug reports or fixes you make to the 26 * Please send any bug reports or fixes you make to the
27 * email address(es): 27 * email address(es):
28 * lksctp developers <lksctp-developers@lists.sourceforge.net> 28 * lksctp developers <linux-sctp@vger.kernel.org>
29 *
30 * Or submit a bug report through the following website:
31 * http://www.sf.net/projects/lksctp
32 * 29 *
33 * Written or modified by: 30 * Written or modified by:
34 * Mingqin Liu <liuming@us.ibm.com> 31 * Mingqin Liu <liuming@us.ibm.com>
@@ -36,9 +33,6 @@
36 * Ardelle Fan <ardelle.fan@intel.com> 33 * Ardelle Fan <ardelle.fan@intel.com>
37 * Ryan Layer <rmlayer@us.ibm.com> 34 * Ryan Layer <rmlayer@us.ibm.com>
38 * Sridhar Samudrala <sri@us.ibm.com> 35 * Sridhar Samudrala <sri@us.ibm.com>
39 *
40 * Any bugs reported given to us we will try to fix... any fixes shared will
41 * be incorporated into the next SCTP release.
42 */ 36 */
43 37
44#include <net/sctp/structs.h> 38#include <net/sctp/structs.h>
@@ -62,12 +56,12 @@ extern long sysctl_sctp_mem[3];
62extern int sysctl_sctp_rmem[3]; 56extern int sysctl_sctp_rmem[3];
63extern int sysctl_sctp_wmem[3]; 57extern int sysctl_sctp_wmem[3];
64 58
65static int proc_sctp_do_hmac_alg(ctl_table *ctl, 59static int proc_sctp_do_hmac_alg(struct ctl_table *ctl,
66 int write, 60 int write,
67 void __user *buffer, size_t *lenp, 61 void __user *buffer, size_t *lenp,
68 62
69 loff_t *ppos); 63 loff_t *ppos);
70static ctl_table sctp_table[] = { 64static struct ctl_table sctp_table[] = {
71 { 65 {
72 .procname = "sctp_mem", 66 .procname = "sctp_mem",
73 .data = &sysctl_sctp_mem, 67 .data = &sysctl_sctp_mem,
@@ -93,7 +87,7 @@ static ctl_table sctp_table[] = {
93 { /* sentinel */ } 87 { /* sentinel */ }
94}; 88};
95 89
96static ctl_table sctp_net_table[] = { 90static struct ctl_table sctp_net_table[] = {
97 { 91 {
98 .procname = "rto_initial", 92 .procname = "rto_initial",
99 .data = &init_net.sctp.rto_initial, 93 .data = &init_net.sctp.rto_initial,
@@ -300,14 +294,14 @@ static ctl_table sctp_net_table[] = {
300 { /* sentinel */ } 294 { /* sentinel */ }
301}; 295};
302 296
303static int proc_sctp_do_hmac_alg(ctl_table *ctl, 297static int proc_sctp_do_hmac_alg(struct ctl_table *ctl,
304 int write, 298 int write,
305 void __user *buffer, size_t *lenp, 299 void __user *buffer, size_t *lenp,
306 loff_t *ppos) 300 loff_t *ppos)
307{ 301{
308 struct net *net = current->nsproxy->net_ns; 302 struct net *net = current->nsproxy->net_ns;
309 char tmp[8]; 303 char tmp[8];
310 ctl_table tbl; 304 struct ctl_table tbl;
311 int ret; 305 int ret;
312 int changed = 0; 306 int changed = 0;
313 char *none = "none"; 307 char *none = "none";
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 098f1d5f769e..e332efb124cc 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -30,10 +30,7 @@
30 * 30 *
31 * Please send any bug reports or fixes you make to the 31 * Please send any bug reports or fixes you make to the
32 * email address(es): 32 * email address(es):
33 * lksctp developers <lksctp-developers@lists.sourceforge.net> 33 * lksctp developers <linux-sctp@vger.kernel.org>
34 *
35 * Or submit a bug report through the following website:
36 * http://www.sf.net/projects/lksctp
37 * 34 *
38 * Written or modified by: 35 * Written or modified by:
39 * La Monte H.P. Yarroll <piggy@acm.org> 36 * La Monte H.P. Yarroll <piggy@acm.org>
@@ -43,9 +40,6 @@
43 * Hui Huang <hui.huang@nokia.com> 40 * Hui Huang <hui.huang@nokia.com>
44 * Sridhar Samudrala <sri@us.ibm.com> 41 * Sridhar Samudrala <sri@us.ibm.com>
45 * Ardelle Fan <ardelle.fan@intel.com> 42 * Ardelle Fan <ardelle.fan@intel.com>
46 *
47 * Any bugs reported given to us we will try to fix... any fixes shared will
48 * be incorporated into the next SCTP release.
49 */ 43 */
50 44
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 45#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -116,7 +110,7 @@ struct sctp_transport *sctp_transport_new(struct net *net,
116{ 110{
117 struct sctp_transport *transport; 111 struct sctp_transport *transport;
118 112
119 transport = t_new(struct sctp_transport, gfp); 113 transport = kzalloc(sizeof(*transport), gfp);
120 if (!transport) 114 if (!transport)
121 goto fail; 115 goto fail;
122 116
@@ -176,14 +170,17 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head)
176 */ 170 */
177static void sctp_transport_destroy(struct sctp_transport *transport) 171static void sctp_transport_destroy(struct sctp_transport *transport)
178{ 172{
179 SCTP_ASSERT(transport->dead, "Transport is not dead", return); 173 if (unlikely(!transport->dead)) {
180 174 WARN(1, "Attempt to destroy undead transport %p!\n", transport);
181 call_rcu(&transport->rcu, sctp_transport_destroy_rcu); 175 return;
176 }
182 177
183 sctp_packet_free(&transport->packet); 178 sctp_packet_free(&transport->packet);
184 179
185 if (transport->asoc) 180 if (transport->asoc)
186 sctp_association_put(transport->asoc); 181 sctp_association_put(transport->asoc);
182
183 call_rcu(&transport->rcu, sctp_transport_destroy_rcu);
187} 184}
188 185
189/* Start T3_rtx timer if it is not already running and update the heartbeat 186/* Start T3_rtx timer if it is not already running and update the heartbeat
@@ -317,11 +314,9 @@ void sctp_transport_put(struct sctp_transport *transport)
317/* Update transport's RTO based on the newly calculated RTT. */ 314/* Update transport's RTO based on the newly calculated RTT. */
318void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) 315void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
319{ 316{
320 /* Check for valid transport. */ 317 if (unlikely(!tp->rto_pending))
321 SCTP_ASSERT(tp, "NULL transport", return); 318 /* We should not be doing any RTO updates unless rto_pending is set. */
322 319 pr_debug("%s: rto_pending not set on transport %p!\n", __func__, tp);
323 /* We should not be doing any RTO updates unless rto_pending is set. */
324 SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return);
325 320
326 if (tp->rttvar || tp->srtt) { 321 if (tp->rttvar || tp->srtt) {
327 struct net *net = sock_net(tp->asoc->base.sk); 322 struct net *net = sock_net(tp->asoc->base.sk);
@@ -377,9 +372,8 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
377 */ 372 */
378 tp->rto_pending = 0; 373 tp->rto_pending = 0;
379 374
380 SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d " 375 pr_debug("%s: transport:%p, rtt:%d, srtt:%d rttvar:%d, rto:%ld\n",
381 "rttvar: %d, rto: %ld\n", __func__, 376 __func__, tp, rtt, tp->srtt, tp->rttvar, tp->rto);
382 tp, rtt, tp->srtt, tp->rttvar, tp->rto);
383} 377}
384 378
385/* This routine updates the transport's cwnd and partial_bytes_acked 379/* This routine updates the transport's cwnd and partial_bytes_acked
@@ -433,12 +427,11 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
433 cwnd += pmtu; 427 cwnd += pmtu;
434 else 428 else
435 cwnd += bytes_acked; 429 cwnd += bytes_acked;
436 SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, " 430
437 "bytes_acked: %d, cwnd: %d, ssthresh: %d, " 431 pr_debug("%s: slow start: transport:%p, bytes_acked:%d, "
438 "flight_size: %d, pba: %d\n", 432 "cwnd:%d, ssthresh:%d, flight_size:%d, pba:%d\n",
439 __func__, 433 __func__, transport, bytes_acked, cwnd, ssthresh,
440 transport, bytes_acked, cwnd, 434 flight_size, pba);
441 ssthresh, flight_size, pba);
442 } else { 435 } else {
443 /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh, 436 /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh,
444 * upon each SACK arrival that advances the Cumulative TSN Ack 437 * upon each SACK arrival that advances the Cumulative TSN Ack
@@ -459,12 +452,12 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
459 cwnd += pmtu; 452 cwnd += pmtu;
460 pba = ((cwnd < pba) ? (pba - cwnd) : 0); 453 pba = ((cwnd < pba) ? (pba - cwnd) : 0);
461 } 454 }
462 SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: " 455
463 "transport: %p, bytes_acked: %d, cwnd: %d, " 456 pr_debug("%s: congestion avoidance: transport:%p, "
464 "ssthresh: %d, flight_size: %d, pba: %d\n", 457 "bytes_acked:%d, cwnd:%d, ssthresh:%d, "
465 __func__, 458 "flight_size:%d, pba:%d\n", __func__,
466 transport, bytes_acked, cwnd, 459 transport, bytes_acked, cwnd, ssthresh,
467 ssthresh, flight_size, pba); 460 flight_size, pba);
468 } 461 }
469 462
470 transport->cwnd = cwnd; 463 transport->cwnd = cwnd;
@@ -558,10 +551,10 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
558 } 551 }
559 552
560 transport->partial_bytes_acked = 0; 553 transport->partial_bytes_acked = 0;
561 SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: " 554
562 "%d ssthresh: %d\n", __func__, 555 pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d\n",
563 transport, reason, 556 __func__, transport, reason, transport->cwnd,
564 transport->cwnd, transport->ssthresh); 557 transport->ssthresh);
565} 558}
566 559
567/* Apply Max.Burst limit to the congestion window: 560/* Apply Max.Burst limit to the congestion window:
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index 396c45174e5b..fbda20028285 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -27,19 +27,13 @@
27 * 27 *
28 * Please send any bug reports or fixes you make to the 28 * Please send any bug reports or fixes you make to the
29 * email address(es): 29 * email address(es):
30 * lksctp developers <lksctp-developers@lists.sourceforge.net> 30 * lksctp developers <linux-sctp@vger.kernel.org>
31 *
32 * Or submit a bug report through the following website:
33 * http://www.sf.net/projects/lksctp
34 * 31 *
35 * Written or modified by: 32 * Written or modified by:
36 * La Monte H.P. Yarroll <piggy@acm.org> 33 * La Monte H.P. Yarroll <piggy@acm.org>
37 * Jon Grimm <jgrimm@us.ibm.com> 34 * Jon Grimm <jgrimm@us.ibm.com>
38 * Karl Knutson <karl@athena.chicago.il.us> 35 * Karl Knutson <karl@athena.chicago.il.us>
39 * Sridhar Samudrala <sri@us.ibm.com> 36 * Sridhar Samudrala <sri@us.ibm.com>
40 *
41 * Any bugs reported given to us we will try to fix... any fixes shared will
42 * be incorporated into the next SCTP release.
43 */ 37 */
44 38
45#include <linux/slab.h> 39#include <linux/slab.h>
@@ -161,8 +155,8 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn,
161 155
162 156
163/* Initialize a Gap Ack Block iterator from memory being provided. */ 157/* Initialize a Gap Ack Block iterator from memory being provided. */
164SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, 158static void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map,
165 struct sctp_tsnmap_iter *iter) 159 struct sctp_tsnmap_iter *iter)
166{ 160{
167 /* Only start looking one past the Cumulative TSN Ack Point. */ 161 /* Only start looking one past the Cumulative TSN Ack Point. */
168 iter->start = map->cumulative_tsn_ack_point + 1; 162 iter->start = map->cumulative_tsn_ack_point + 1;
@@ -171,9 +165,9 @@ SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map,
171/* Get the next Gap Ack Blocks. Returns 0 if there was not another block 165/* Get the next Gap Ack Blocks. Returns 0 if there was not another block
172 * to get. 166 * to get.
173 */ 167 */
174SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, 168static int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map,
175 struct sctp_tsnmap_iter *iter, 169 struct sctp_tsnmap_iter *iter,
176 __u16 *start, __u16 *end) 170 __u16 *start, __u16 *end)
177{ 171{
178 int ended = 0; 172 int ended = 0;
179 __u16 start_ = 0, end_ = 0, offset; 173 __u16 start_ = 0, end_ = 0, offset;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 10c018a5b9fe..81089ed65456 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -28,19 +28,13 @@
28 * 28 *
29 * Please send any bug reports or fixes you make to the 29 * Please send any bug reports or fixes you make to the
30 * email address(es): 30 * email address(es):
31 * lksctp developers <lksctp-developers@lists.sourceforge.net> 31 * lksctp developers <linux-sctp@vger.kernel.org>
32 *
33 * Or submit a bug report through the following website:
34 * http://www.sf.net/projects/lksctp
35 * 32 *
36 * Written or modified by: 33 * Written or modified by:
37 * Jon Grimm <jgrimm@us.ibm.com> 34 * Jon Grimm <jgrimm@us.ibm.com>
38 * La Monte H.P. Yarroll <piggy@acm.org> 35 * La Monte H.P. Yarroll <piggy@acm.org>
39 * Ardelle Fan <ardelle.fan@intel.com> 36 * Ardelle Fan <ardelle.fan@intel.com>
40 * Sridhar Samudrala <sri@us.ibm.com> 37 * Sridhar Samudrala <sri@us.ibm.com>
41 *
42 * Any bugs reported given to us we will try to fix... any fixes shared will
43 * be incorporated into the next SCTP release.
44 */ 38 */
45 39
46#include <linux/slab.h> 40#include <linux/slab.h>
@@ -57,9 +51,9 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event);
57 51
58 52
59/* Initialize an ULP event from an given skb. */ 53/* Initialize an ULP event from an given skb. */
60SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, 54static void sctp_ulpevent_init(struct sctp_ulpevent *event,
61 int msg_flags, 55 int msg_flags,
62 unsigned int len) 56 unsigned int len)
63{ 57{
64 memset(event, 0, sizeof(struct sctp_ulpevent)); 58 memset(event, 0, sizeof(struct sctp_ulpevent));
65 event->msg_flags = msg_flags; 59 event->msg_flags = msg_flags;
@@ -67,8 +61,8 @@ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event,
67} 61}
68 62
69/* Create a new sctp_ulpevent. */ 63/* Create a new sctp_ulpevent. */
70SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, 64static struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags,
71 gfp_t gfp) 65 gfp_t gfp)
72{ 66{
73 struct sctp_ulpevent *event; 67 struct sctp_ulpevent *event;
74 struct sk_buff *skb; 68 struct sk_buff *skb;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 04e3d470f877..1c1484ed605d 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -27,18 +27,12 @@
27 * 27 *
28 * Please send any bug reports or fixes you make to the 28 * Please send any bug reports or fixes you make to the
29 * email address(es): 29 * email address(es):
30 * lksctp developers <lksctp-developers@lists.sourceforge.net> 30 * lksctp developers <linux-sctp@vger.kernel.org>
31 *
32 * Or submit a bug report through the following website:
33 * http://www.sf.net/projects/lksctp
34 * 31 *
35 * Written or modified by: 32 * Written or modified by:
36 * Jon Grimm <jgrimm@us.ibm.com> 33 * Jon Grimm <jgrimm@us.ibm.com>
37 * La Monte H.P. Yarroll <piggy@acm.org> 34 * La Monte H.P. Yarroll <piggy@acm.org>
38 * Sridhar Samudrala <sri@us.ibm.com> 35 * Sridhar Samudrala <sri@us.ibm.com>
39 *
40 * Any bugs reported given to us we will try to fix... any fixes shared will
41 * be incorporated into the next SCTP release.
42 */ 36 */
43 37
44#include <linux/slab.h> 38#include <linux/slab.h>
diff --git a/net/socket.c b/net/socket.c
index 4ca1526db756..ebed4b68f768 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -104,6 +104,12 @@
104#include <linux/route.h> 104#include <linux/route.h>
105#include <linux/sockios.h> 105#include <linux/sockios.h>
106#include <linux/atalk.h> 106#include <linux/atalk.h>
107#include <net/busy_poll.h>
108
109#ifdef CONFIG_NET_RX_BUSY_POLL
110unsigned int sysctl_net_busy_read __read_mostly;
111unsigned int sysctl_net_busy_poll __read_mostly;
112#endif
107 113
108static int sock_no_open(struct inode *irrelevant, struct file *dontcare); 114static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 115static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
@@ -848,11 +854,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
848} 854}
849EXPORT_SYMBOL(kernel_recvmsg); 855EXPORT_SYMBOL(kernel_recvmsg);
850 856
851static void sock_aio_dtor(struct kiocb *iocb)
852{
853 kfree(iocb->private);
854}
855
856static ssize_t sock_sendpage(struct file *file, struct page *page, 857static ssize_t sock_sendpage(struct file *file, struct page *page,
857 int offset, size_t size, loff_t *ppos, int more) 858 int offset, size_t size, loff_t *ppos, int more)
858{ 859{
@@ -883,12 +884,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
883static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 884static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
884 struct sock_iocb *siocb) 885 struct sock_iocb *siocb)
885{ 886{
886 if (!is_sync_kiocb(iocb)) { 887 if (!is_sync_kiocb(iocb))
887 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); 888 BUG();
888 if (!siocb)
889 return NULL;
890 iocb->ki_dtor = sock_aio_dtor;
891 }
892 889
893 siocb->kiocb = iocb; 890 siocb->kiocb = iocb;
894 iocb->private = siocb; 891 iocb->private = siocb;
@@ -925,7 +922,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
925 if (pos != 0) 922 if (pos != 0)
926 return -ESPIPE; 923 return -ESPIPE;
927 924
928 if (iocb->ki_left == 0) /* Match SYS5 behaviour */ 925 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
929 return 0; 926 return 0;
930 927
931 928
@@ -1142,13 +1139,24 @@ EXPORT_SYMBOL(sock_create_lite);
1142/* No kernel lock held - perfect */ 1139/* No kernel lock held - perfect */
1143static unsigned int sock_poll(struct file *file, poll_table *wait) 1140static unsigned int sock_poll(struct file *file, poll_table *wait)
1144{ 1141{
1142 unsigned int busy_flag = 0;
1145 struct socket *sock; 1143 struct socket *sock;
1146 1144
1147 /* 1145 /*
1148 * We can't return errors to poll, so it's either yes or no. 1146 * We can't return errors to poll, so it's either yes or no.
1149 */ 1147 */
1150 sock = file->private_data; 1148 sock = file->private_data;
1151 return sock->ops->poll(file, sock, wait); 1149
1150 if (sk_can_busy_loop(sock->sk)) {
1151 /* this socket can poll_ll so tell the system call */
1152 busy_flag = POLL_BUSY_LOOP;
1153
1154 /* once, only if requested by syscall */
1155 if (wait && (wait->_key & POLL_BUSY_LOOP))
1156 sk_busy_loop(sock->sk, 1);
1157 }
1158
1159 return busy_flag | sock->ops->poll(file, sock, wait);
1152} 1160}
1153 1161
1154static int sock_mmap(struct file *file, struct vm_area_struct *vma) 1162static int sock_mmap(struct file *file, struct vm_area_struct *vma)
@@ -2635,7 +2643,9 @@ static int __init sock_init(void)
2635 */ 2643 */
2636 2644
2637#ifdef CONFIG_NETFILTER 2645#ifdef CONFIG_NETFILTER
2638 netfilter_init(); 2646 err = netfilter_init();
2647 if (err)
2648 goto out;
2639#endif 2649#endif
2640 2650
2641#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING 2651#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
@@ -3053,12 +3063,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3053 3063
3054 uifmap32 = &uifr32->ifr_ifru.ifru_map; 3064 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3055 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); 3065 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3056 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 3066 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3057 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 3067 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3058 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 3068 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3059 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); 3069 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3060 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); 3070 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3061 err |= __get_user(ifr.ifr_map.port, &uifmap32->port); 3071 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
3062 if (err) 3072 if (err)
3063 return -EFAULT; 3073 return -EFAULT;
3064 3074
@@ -3069,12 +3079,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3069 3079
3070 if (cmd == SIOCGIFMAP && !err) { 3080 if (cmd == SIOCGIFMAP && !err) {
3071 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); 3081 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3072 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 3082 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3073 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 3083 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3074 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 3084 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3075 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); 3085 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3076 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); 3086 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3077 err |= __put_user(ifr.ifr_map.port, &uifmap32->port); 3087 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
3078 if (err) 3088 if (err)
3079 err = -EFAULT; 3089 err = -EFAULT;
3080 } 3090 }
@@ -3148,25 +3158,25 @@ static int routing_ioctl(struct net *net, struct socket *sock,
3148 struct in6_rtmsg32 __user *ur6 = argp; 3158 struct in6_rtmsg32 __user *ur6 = argp;
3149 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3159 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
3150 3 * sizeof(struct in6_addr)); 3160 3 * sizeof(struct in6_addr));
3151 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); 3161 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3152 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); 3162 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3153 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); 3163 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3154 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); 3164 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3155 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); 3165 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3156 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); 3166 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3157 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); 3167 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
3158 3168
3159 r = (void *) &r6; 3169 r = (void *) &r6;
3160 } else { /* ipv4 */ 3170 } else { /* ipv4 */
3161 struct rtentry32 __user *ur4 = argp; 3171 struct rtentry32 __user *ur4 = argp;
3162 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3172 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
3163 3 * sizeof(struct sockaddr)); 3173 3 * sizeof(struct sockaddr));
3164 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags)); 3174 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3165 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric)); 3175 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3166 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu)); 3176 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3167 ret |= __get_user(r4.rt_window, &(ur4->rt_window)); 3177 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3168 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt)); 3178 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3169 ret |= __get_user(rtdev, &(ur4->rt_dev)); 3179 ret |= get_user(rtdev, &(ur4->rt_dev));
3170 if (rtdev) { 3180 if (rtdev) {
3171 ret |= copy_from_user(devname, compat_ptr(rtdev), 15); 3181 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
3172 r4.rt_dev = (char __user __force *)devname; 3182 r4.rt_dev = (char __user __force *)devname;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index ed2fdd210c0b..5285ead196c0 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -250,11 +250,11 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
250EXPORT_SYMBOL_GPL(rpcauth_list_flavors); 250EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
251 251
252struct rpc_auth * 252struct rpc_auth *
253rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) 253rpcauth_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
254{ 254{
255 struct rpc_auth *auth; 255 struct rpc_auth *auth;
256 const struct rpc_authops *ops; 256 const struct rpc_authops *ops;
257 u32 flavor = pseudoflavor_to_flavor(pseudoflavor); 257 u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
258 258
259 auth = ERR_PTR(-EINVAL); 259 auth = ERR_PTR(-EINVAL);
260 if (flavor >= RPC_AUTH_MAXFLAVOR) 260 if (flavor >= RPC_AUTH_MAXFLAVOR)
@@ -269,7 +269,7 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
269 goto out; 269 goto out;
270 } 270 }
271 spin_unlock(&rpc_authflavor_lock); 271 spin_unlock(&rpc_authflavor_lock);
272 auth = ops->create(clnt, pseudoflavor); 272 auth = ops->create(args, clnt);
273 module_put(ops->owner); 273 module_put(ops->owner);
274 if (IS_ERR(auth)) 274 if (IS_ERR(auth))
275 return auth; 275 return auth;
@@ -343,6 +343,27 @@ out_nocache:
343EXPORT_SYMBOL_GPL(rpcauth_init_credcache); 343EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
344 344
345/* 345/*
346 * Setup a credential key lifetime timeout notification
347 */
348int
349rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
350{
351 if (!cred->cr_auth->au_ops->key_timeout)
352 return 0;
353 return cred->cr_auth->au_ops->key_timeout(auth, cred);
354}
355EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
356
357bool
358rpcauth_cred_key_to_expire(struct rpc_cred *cred)
359{
360 if (!cred->cr_ops->crkey_to_expire)
361 return false;
362 return cred->cr_ops->crkey_to_expire(cred);
363}
364EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
365
366/*
346 * Destroy a list of credentials 367 * Destroy a list of credentials
347 */ 368 */
348static inline 369static inline
@@ -413,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
413/* 434/*
414 * Remove stale credentials. Avoid sleeping inside the loop. 435 * Remove stale credentials. Avoid sleeping inside the loop.
415 */ 436 */
416static int 437static long
417rpcauth_prune_expired(struct list_head *free, int nr_to_scan) 438rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
418{ 439{
419 spinlock_t *cache_lock; 440 spinlock_t *cache_lock;
420 struct rpc_cred *cred, *next; 441 struct rpc_cred *cred, *next;
421 unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; 442 unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
443 long freed = 0;
422 444
423 list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { 445 list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
424 446
@@ -430,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
430 */ 452 */
431 if (time_in_range(cred->cr_expire, expired, jiffies) && 453 if (time_in_range(cred->cr_expire, expired, jiffies) &&
432 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) 454 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
433 return 0; 455 break;
434 456
435 list_del_init(&cred->cr_lru); 457 list_del_init(&cred->cr_lru);
436 number_cred_unused--; 458 number_cred_unused--;
459 freed++;
437 if (atomic_read(&cred->cr_count) != 0) 460 if (atomic_read(&cred->cr_count) != 0)
438 continue; 461 continue;
439 462
@@ -446,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
446 } 469 }
447 spin_unlock(cache_lock); 470 spin_unlock(cache_lock);
448 } 471 }
449 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; 472 return freed;
450} 473}
451 474
452/* 475/*
453 * Run memory cache shrinker. 476 * Run memory cache shrinker.
454 */ 477 */
455static int 478static unsigned long
456rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) 479rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
480
457{ 481{
458 LIST_HEAD(free); 482 LIST_HEAD(free);
459 int res; 483 unsigned long freed;
460 int nr_to_scan = sc->nr_to_scan;
461 gfp_t gfp_mask = sc->gfp_mask;
462 484
463 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 485 if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
464 return (nr_to_scan == 0) ? 0 : -1; 486 return SHRINK_STOP;
487
488 /* nothing left, don't come back */
465 if (list_empty(&cred_unused)) 489 if (list_empty(&cred_unused))
466 return 0; 490 return SHRINK_STOP;
491
467 spin_lock(&rpc_credcache_lock); 492 spin_lock(&rpc_credcache_lock);
468 res = rpcauth_prune_expired(&free, nr_to_scan); 493 freed = rpcauth_prune_expired(&free, sc->nr_to_scan);
469 spin_unlock(&rpc_credcache_lock); 494 spin_unlock(&rpc_credcache_lock);
470 rpcauth_destroy_credlist(&free); 495 rpcauth_destroy_credlist(&free);
471 return res; 496
497 return freed;
498}
499
500static unsigned long
501rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
502
503{
504 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
472} 505}
473 506
474/* 507/*
@@ -784,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task)
784} 817}
785 818
786static struct shrinker rpc_cred_shrinker = { 819static struct shrinker rpc_cred_shrinker = {
787 .shrink = rpcauth_cache_shrinker, 820 .count_objects = rpcauth_cache_shrink_count,
821 .scan_objects = rpcauth_cache_shrink_scan,
788 .seeks = DEFAULT_SEEKS, 822 .seeks = DEFAULT_SEEKS,
789}; 823};
790 824
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index b6badafc6494..ed04869b2d4f 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -89,6 +89,7 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
89 gcred->acred.uid = acred->uid; 89 gcred->acred.uid = acred->uid;
90 gcred->acred.gid = acred->gid; 90 gcred->acred.gid = acred->gid;
91 gcred->acred.group_info = acred->group_info; 91 gcred->acred.group_info = acred->group_info;
92 gcred->acred.ac_flags = 0;
92 if (gcred->acred.group_info != NULL) 93 if (gcred->acred.group_info != NULL)
93 get_group_info(gcred->acred.group_info); 94 get_group_info(gcred->acred.group_info);
94 gcred->acred.machine_cred = acred->machine_cred; 95 gcred->acred.machine_cred = acred->machine_cred;
@@ -182,11 +183,78 @@ void rpc_destroy_generic_auth(void)
182 rpcauth_destroy_credcache(&generic_auth); 183 rpcauth_destroy_credcache(&generic_auth);
183} 184}
184 185
186/*
187 * Test the the current time (now) against the underlying credential key expiry
188 * minus a timeout and setup notification.
189 *
190 * The normal case:
191 * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
192 * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
193 * rpc_credops crmatch routine to notify this generic cred when it's key
194 * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
195 *
196 * The error case:
197 * If the underlying cred lookup fails, return -EACCES.
198 *
199 * The 'almost' error case:
200 * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
201 * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
202 * on the acred ac_flags and return 0.
203 */
204static int
205generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
206{
207 struct auth_cred *acred = &container_of(cred, struct generic_cred,
208 gc_base)->acred;
209 struct rpc_cred *tcred;
210 int ret = 0;
211
212
213 /* Fast track for non crkey_timeout (no key) underlying credentials */
214 if (test_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags))
215 return 0;
216
217 /* Fast track for the normal case */
218 if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
219 return 0;
220
221 /* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
222 tcred = auth->au_ops->lookup_cred(auth, acred, 0);
223 if (IS_ERR(tcred))
224 return -EACCES;
225
226 if (!tcred->cr_ops->crkey_timeout) {
227 set_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags);
228 ret = 0;
229 goto out_put;
230 }
231
232 /* Test for the almost error case */
233 ret = tcred->cr_ops->crkey_timeout(tcred);
234 if (ret != 0) {
235 set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
236 ret = 0;
237 } else {
238 /* In case underlying cred key has been reset */
239 if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
240 &acred->ac_flags))
241 dprintk("RPC: UID %d Credential key reset\n",
242 from_kuid(&init_user_ns, tcred->cr_uid));
243 /* set up fasttrack for the normal case */
244 set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
245 }
246
247out_put:
248 put_rpccred(tcred);
249 return ret;
250}
251
185static const struct rpc_authops generic_auth_ops = { 252static const struct rpc_authops generic_auth_ops = {
186 .owner = THIS_MODULE, 253 .owner = THIS_MODULE,
187 .au_name = "Generic", 254 .au_name = "Generic",
188 .lookup_cred = generic_lookup_cred, 255 .lookup_cred = generic_lookup_cred,
189 .crcreate = generic_create_cred, 256 .crcreate = generic_create_cred,
257 .key_timeout = generic_key_timeout,
190}; 258};
191 259
192static struct rpc_auth generic_auth = { 260static struct rpc_auth generic_auth = {
@@ -194,9 +262,23 @@ static struct rpc_auth generic_auth = {
194 .au_count = ATOMIC_INIT(0), 262 .au_count = ATOMIC_INIT(0),
195}; 263};
196 264
265static bool generic_key_to_expire(struct rpc_cred *cred)
266{
267 struct auth_cred *acred = &container_of(cred, struct generic_cred,
268 gc_base)->acred;
269 bool ret;
270
271 get_rpccred(cred);
272 ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
273 put_rpccred(cred);
274
275 return ret;
276}
277
197static const struct rpc_credops generic_credops = { 278static const struct rpc_credops generic_credops = {
198 .cr_name = "Generic cred", 279 .cr_name = "Generic cred",
199 .crdestroy = generic_destroy_cred, 280 .crdestroy = generic_destroy_cred,
200 .crbind = generic_bind_cred, 281 .crbind = generic_bind_cred,
201 .crmatch = generic_match, 282 .crmatch = generic_match,
283 .crkey_to_expire = generic_key_to_expire,
202}; 284};
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index fc2f78d6a9b4..084656671d6e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -51,6 +51,7 @@
51#include <linux/sunrpc/rpc_pipe_fs.h> 51#include <linux/sunrpc/rpc_pipe_fs.h>
52#include <linux/sunrpc/gss_api.h> 52#include <linux/sunrpc/gss_api.h>
53#include <asm/uaccess.h> 53#include <asm/uaccess.h>
54#include <linux/hashtable.h>
54 55
55#include "../netns.h" 56#include "../netns.h"
56 57
@@ -62,6 +63,9 @@ static const struct rpc_credops gss_nullops;
62#define GSS_RETRY_EXPIRED 5 63#define GSS_RETRY_EXPIRED 5
63static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; 64static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
64 65
66#define GSS_KEY_EXPIRE_TIMEO 240
67static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO;
68
65#ifdef RPC_DEBUG 69#ifdef RPC_DEBUG
66# define RPCDBG_FACILITY RPCDBG_AUTH 70# define RPCDBG_FACILITY RPCDBG_AUTH
67#endif 71#endif
@@ -71,19 +75,33 @@ static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
71 * using integrity (two 4-byte integers): */ 75 * using integrity (two 4-byte integers): */
72#define GSS_VERF_SLACK 100 76#define GSS_VERF_SLACK 100
73 77
78static DEFINE_HASHTABLE(gss_auth_hash_table, 4);
79static DEFINE_SPINLOCK(gss_auth_hash_lock);
80
81struct gss_pipe {
82 struct rpc_pipe_dir_object pdo;
83 struct rpc_pipe *pipe;
84 struct rpc_clnt *clnt;
85 const char *name;
86 struct kref kref;
87};
88
74struct gss_auth { 89struct gss_auth {
75 struct kref kref; 90 struct kref kref;
91 struct hlist_node hash;
76 struct rpc_auth rpc_auth; 92 struct rpc_auth rpc_auth;
77 struct gss_api_mech *mech; 93 struct gss_api_mech *mech;
78 enum rpc_gss_svc service; 94 enum rpc_gss_svc service;
79 struct rpc_clnt *client; 95 struct rpc_clnt *client;
96 struct net *net;
80 /* 97 /*
81 * There are two upcall pipes; dentry[1], named "gssd", is used 98 * There are two upcall pipes; dentry[1], named "gssd", is used
82 * for the new text-based upcall; dentry[0] is named after the 99 * for the new text-based upcall; dentry[0] is named after the
83 * mechanism (for example, "krb5") and exists for 100 * mechanism (for example, "krb5") and exists for
84 * backwards-compatibility with older gssd's. 101 * backwards-compatibility with older gssd's.
85 */ 102 */
86 struct rpc_pipe *pipe[2]; 103 struct gss_pipe *gss_pipe[2];
104 const char *target_name;
87}; 105};
88 106
89/* pipe_version >= 0 if and only if someone has a pipe open. */ 107/* pipe_version >= 0 if and only if someone has a pipe open. */
@@ -294,7 +312,7 @@ static void put_pipe_version(struct net *net)
294static void 312static void
295gss_release_msg(struct gss_upcall_msg *gss_msg) 313gss_release_msg(struct gss_upcall_msg *gss_msg)
296{ 314{
297 struct net *net = rpc_net_ns(gss_msg->auth->client); 315 struct net *net = gss_msg->auth->net;
298 if (!atomic_dec_and_test(&gss_msg->count)) 316 if (!atomic_dec_and_test(&gss_msg->count))
299 return; 317 return;
300 put_pipe_version(net); 318 put_pipe_version(net);
@@ -406,8 +424,8 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
406} 424}
407 425
408static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, 426static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
409 struct rpc_clnt *clnt, 427 const char *service_name,
410 const char *service_name) 428 const char *target_name)
411{ 429{
412 struct gss_api_mech *mech = gss_msg->auth->mech; 430 struct gss_api_mech *mech = gss_msg->auth->mech;
413 char *p = gss_msg->databuf; 431 char *p = gss_msg->databuf;
@@ -417,8 +435,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
417 mech->gm_name, 435 mech->gm_name,
418 from_kuid(&init_user_ns, gss_msg->uid)); 436 from_kuid(&init_user_ns, gss_msg->uid));
419 p += gss_msg->msg.len; 437 p += gss_msg->msg.len;
420 if (clnt->cl_principal) { 438 if (target_name) {
421 len = sprintf(p, "target=%s ", clnt->cl_principal); 439 len = sprintf(p, "target=%s ", target_name);
422 p += len; 440 p += len;
423 gss_msg->msg.len += len; 441 gss_msg->msg.len += len;
424 } 442 }
@@ -439,21 +457,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
439 BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN); 457 BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN);
440} 458}
441 459
442static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
443 struct rpc_clnt *clnt,
444 const char *service_name)
445{
446 struct net *net = rpc_net_ns(clnt);
447 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
448
449 if (sn->pipe_version == 0)
450 gss_encode_v0_msg(gss_msg);
451 else /* pipe_version == 1 */
452 gss_encode_v1_msg(gss_msg, clnt, service_name);
453}
454
455static struct gss_upcall_msg * 460static struct gss_upcall_msg *
456gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, 461gss_alloc_msg(struct gss_auth *gss_auth,
457 kuid_t uid, const char *service_name) 462 kuid_t uid, const char *service_name)
458{ 463{
459 struct gss_upcall_msg *gss_msg; 464 struct gss_upcall_msg *gss_msg;
@@ -462,31 +467,36 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
462 gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); 467 gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
463 if (gss_msg == NULL) 468 if (gss_msg == NULL)
464 return ERR_PTR(-ENOMEM); 469 return ERR_PTR(-ENOMEM);
465 vers = get_pipe_version(rpc_net_ns(clnt)); 470 vers = get_pipe_version(gss_auth->net);
466 if (vers < 0) { 471 if (vers < 0) {
467 kfree(gss_msg); 472 kfree(gss_msg);
468 return ERR_PTR(vers); 473 return ERR_PTR(vers);
469 } 474 }
470 gss_msg->pipe = gss_auth->pipe[vers]; 475 gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe;
471 INIT_LIST_HEAD(&gss_msg->list); 476 INIT_LIST_HEAD(&gss_msg->list);
472 rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); 477 rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
473 init_waitqueue_head(&gss_msg->waitqueue); 478 init_waitqueue_head(&gss_msg->waitqueue);
474 atomic_set(&gss_msg->count, 1); 479 atomic_set(&gss_msg->count, 1);
475 gss_msg->uid = uid; 480 gss_msg->uid = uid;
476 gss_msg->auth = gss_auth; 481 gss_msg->auth = gss_auth;
477 gss_encode_msg(gss_msg, clnt, service_name); 482 switch (vers) {
483 case 0:
484 gss_encode_v0_msg(gss_msg);
485 default:
486 gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
487 };
478 return gss_msg; 488 return gss_msg;
479} 489}
480 490
481static struct gss_upcall_msg * 491static struct gss_upcall_msg *
482gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred) 492gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
483{ 493{
484 struct gss_cred *gss_cred = container_of(cred, 494 struct gss_cred *gss_cred = container_of(cred,
485 struct gss_cred, gc_base); 495 struct gss_cred, gc_base);
486 struct gss_upcall_msg *gss_new, *gss_msg; 496 struct gss_upcall_msg *gss_new, *gss_msg;
487 kuid_t uid = cred->cr_uid; 497 kuid_t uid = cred->cr_uid;
488 498
489 gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal); 499 gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal);
490 if (IS_ERR(gss_new)) 500 if (IS_ERR(gss_new))
491 return gss_new; 501 return gss_new;
492 gss_msg = gss_add_msg(gss_new); 502 gss_msg = gss_add_msg(gss_new);
@@ -527,7 +537,7 @@ gss_refresh_upcall(struct rpc_task *task)
527 537
528 dprintk("RPC: %5u %s for uid %u\n", 538 dprintk("RPC: %5u %s for uid %u\n",
529 task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid)); 539 task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid));
530 gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); 540 gss_msg = gss_setup_upcall(gss_auth, cred);
531 if (PTR_ERR(gss_msg) == -EAGAIN) { 541 if (PTR_ERR(gss_msg) == -EAGAIN) {
532 /* XXX: warning on the first, under the assumption we 542 /* XXX: warning on the first, under the assumption we
533 * shouldn't normally hit this case on a refresh. */ 543 * shouldn't normally hit this case on a refresh. */
@@ -566,7 +576,7 @@ out:
566static inline int 576static inline int
567gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) 577gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
568{ 578{
569 struct net *net = rpc_net_ns(gss_auth->client); 579 struct net *net = gss_auth->net;
570 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); 580 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
571 struct rpc_pipe *pipe; 581 struct rpc_pipe *pipe;
572 struct rpc_cred *cred = &gss_cred->gc_base; 582 struct rpc_cred *cred = &gss_cred->gc_base;
@@ -583,7 +593,7 @@ retry:
583 timeout = 15 * HZ; 593 timeout = 15 * HZ;
584 if (!sn->gssd_running) 594 if (!sn->gssd_running)
585 timeout = HZ >> 2; 595 timeout = HZ >> 2;
586 gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); 596 gss_msg = gss_setup_upcall(gss_auth, cred);
587 if (PTR_ERR(gss_msg) == -EAGAIN) { 597 if (PTR_ERR(gss_msg) == -EAGAIN) {
588 err = wait_event_interruptible_timeout(pipe_version_waitqueue, 598 err = wait_event_interruptible_timeout(pipe_version_waitqueue,
589 sn->pipe_version >= 0, timeout); 599 sn->pipe_version >= 0, timeout);
@@ -797,83 +807,153 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
797 } 807 }
798} 808}
799 809
800static void gss_pipes_dentries_destroy(struct rpc_auth *auth) 810static void gss_pipe_dentry_destroy(struct dentry *dir,
811 struct rpc_pipe_dir_object *pdo)
801{ 812{
802 struct gss_auth *gss_auth; 813 struct gss_pipe *gss_pipe = pdo->pdo_data;
814 struct rpc_pipe *pipe = gss_pipe->pipe;
803 815
804 gss_auth = container_of(auth, struct gss_auth, rpc_auth); 816 if (pipe->dentry != NULL) {
805 if (gss_auth->pipe[0]->dentry) 817 rpc_unlink(pipe->dentry);
806 rpc_unlink(gss_auth->pipe[0]->dentry); 818 pipe->dentry = NULL;
807 if (gss_auth->pipe[1]->dentry) 819 }
808 rpc_unlink(gss_auth->pipe[1]->dentry);
809} 820}
810 821
811static int gss_pipes_dentries_create(struct rpc_auth *auth) 822static int gss_pipe_dentry_create(struct dentry *dir,
823 struct rpc_pipe_dir_object *pdo)
812{ 824{
813 int err; 825 struct gss_pipe *p = pdo->pdo_data;
814 struct gss_auth *gss_auth; 826 struct dentry *dentry;
815 struct rpc_clnt *clnt;
816 827
817 gss_auth = container_of(auth, struct gss_auth, rpc_auth); 828 dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
818 clnt = gss_auth->client; 829 if (IS_ERR(dentry))
819 830 return PTR_ERR(dentry);
820 gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry, 831 p->pipe->dentry = dentry;
821 "gssd",
822 clnt, gss_auth->pipe[1]);
823 if (IS_ERR(gss_auth->pipe[1]->dentry))
824 return PTR_ERR(gss_auth->pipe[1]->dentry);
825 gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry,
826 gss_auth->mech->gm_name,
827 clnt, gss_auth->pipe[0]);
828 if (IS_ERR(gss_auth->pipe[0]->dentry)) {
829 err = PTR_ERR(gss_auth->pipe[0]->dentry);
830 goto err_unlink_pipe_1;
831 }
832 return 0; 832 return 0;
833}
833 834
834err_unlink_pipe_1: 835static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = {
835 rpc_unlink(gss_auth->pipe[1]->dentry); 836 .create = gss_pipe_dentry_create,
836 return err; 837 .destroy = gss_pipe_dentry_destroy,
838};
839
840static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt,
841 const char *name,
842 const struct rpc_pipe_ops *upcall_ops)
843{
844 struct gss_pipe *p;
845 int err = -ENOMEM;
846
847 p = kmalloc(sizeof(*p), GFP_KERNEL);
848 if (p == NULL)
849 goto err;
850 p->pipe = rpc_mkpipe_data(upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
851 if (IS_ERR(p->pipe)) {
852 err = PTR_ERR(p->pipe);
853 goto err_free_gss_pipe;
854 }
855 p->name = name;
856 p->clnt = clnt;
857 kref_init(&p->kref);
858 rpc_init_pipe_dir_object(&p->pdo,
859 &gss_pipe_dir_object_ops,
860 p);
861 return p;
862err_free_gss_pipe:
863 kfree(p);
864err:
865 return ERR_PTR(err);
866}
867
868struct gss_alloc_pdo {
869 struct rpc_clnt *clnt;
870 const char *name;
871 const struct rpc_pipe_ops *upcall_ops;
872};
873
874static int gss_pipe_match_pdo(struct rpc_pipe_dir_object *pdo, void *data)
875{
876 struct gss_pipe *gss_pipe;
877 struct gss_alloc_pdo *args = data;
878
879 if (pdo->pdo_ops != &gss_pipe_dir_object_ops)
880 return 0;
881 gss_pipe = container_of(pdo, struct gss_pipe, pdo);
882 if (strcmp(gss_pipe->name, args->name) != 0)
883 return 0;
884 if (!kref_get_unless_zero(&gss_pipe->kref))
885 return 0;
886 return 1;
887}
888
889static struct rpc_pipe_dir_object *gss_pipe_alloc_pdo(void *data)
890{
891 struct gss_pipe *gss_pipe;
892 struct gss_alloc_pdo *args = data;
893
894 gss_pipe = gss_pipe_alloc(args->clnt, args->name, args->upcall_ops);
895 if (!IS_ERR(gss_pipe))
896 return &gss_pipe->pdo;
897 return NULL;
837} 898}
838 899
839static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt, 900static struct gss_pipe *gss_pipe_get(struct rpc_clnt *clnt,
840 struct rpc_auth *auth) 901 const char *name,
902 const struct rpc_pipe_ops *upcall_ops)
841{ 903{
842 struct net *net = rpc_net_ns(clnt); 904 struct net *net = rpc_net_ns(clnt);
843 struct super_block *sb; 905 struct rpc_pipe_dir_object *pdo;
906 struct gss_alloc_pdo args = {
907 .clnt = clnt,
908 .name = name,
909 .upcall_ops = upcall_ops,
910 };
844 911
845 sb = rpc_get_sb_net(net); 912 pdo = rpc_find_or_alloc_pipe_dir_object(net,
846 if (sb) { 913 &clnt->cl_pipedir_objects,
847 if (clnt->cl_dentry) 914 gss_pipe_match_pdo,
848 gss_pipes_dentries_destroy(auth); 915 gss_pipe_alloc_pdo,
849 rpc_put_sb_net(net); 916 &args);
850 } 917 if (pdo != NULL)
918 return container_of(pdo, struct gss_pipe, pdo);
919 return ERR_PTR(-ENOMEM);
851} 920}
852 921
853static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt, 922static void __gss_pipe_free(struct gss_pipe *p)
854 struct rpc_auth *auth)
855{ 923{
924 struct rpc_clnt *clnt = p->clnt;
856 struct net *net = rpc_net_ns(clnt); 925 struct net *net = rpc_net_ns(clnt);
857 struct super_block *sb;
858 int err = 0;
859 926
860 sb = rpc_get_sb_net(net); 927 rpc_remove_pipe_dir_object(net,
861 if (sb) { 928 &clnt->cl_pipedir_objects,
862 if (clnt->cl_dentry) 929 &p->pdo);
863 err = gss_pipes_dentries_create(auth); 930 rpc_destroy_pipe_data(p->pipe);
864 rpc_put_sb_net(net); 931 kfree(p);
865 } 932}
866 return err; 933
934static void __gss_pipe_release(struct kref *kref)
935{
936 struct gss_pipe *p = container_of(kref, struct gss_pipe, kref);
937
938 __gss_pipe_free(p);
939}
940
941static void gss_pipe_free(struct gss_pipe *p)
942{
943 if (p != NULL)
944 kref_put(&p->kref, __gss_pipe_release);
867} 945}
868 946
869/* 947/*
870 * NOTE: we have the opportunity to use different 948 * NOTE: we have the opportunity to use different
871 * parameters based on the input flavor (which must be a pseudoflavor) 949 * parameters based on the input flavor (which must be a pseudoflavor)
872 */ 950 */
873static struct rpc_auth * 951static struct gss_auth *
874gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) 952gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
875{ 953{
954 rpc_authflavor_t flavor = args->pseudoflavor;
876 struct gss_auth *gss_auth; 955 struct gss_auth *gss_auth;
956 struct gss_pipe *gss_pipe;
877 struct rpc_auth * auth; 957 struct rpc_auth * auth;
878 int err = -ENOMEM; /* XXX? */ 958 int err = -ENOMEM; /* XXX? */
879 959
@@ -883,12 +963,20 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
883 return ERR_PTR(err); 963 return ERR_PTR(err);
884 if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) 964 if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
885 goto out_dec; 965 goto out_dec;
966 INIT_HLIST_NODE(&gss_auth->hash);
967 gss_auth->target_name = NULL;
968 if (args->target_name) {
969 gss_auth->target_name = kstrdup(args->target_name, GFP_KERNEL);
970 if (gss_auth->target_name == NULL)
971 goto err_free;
972 }
886 gss_auth->client = clnt; 973 gss_auth->client = clnt;
974 gss_auth->net = get_net(rpc_net_ns(clnt));
887 err = -EINVAL; 975 err = -EINVAL;
888 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); 976 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
889 if (!gss_auth->mech) { 977 if (!gss_auth->mech) {
890 dprintk("RPC: Pseudoflavor %d not found!\n", flavor); 978 dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
891 goto err_free; 979 goto err_put_net;
892 } 980 }
893 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); 981 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
894 if (gss_auth->service == 0) 982 if (gss_auth->service == 0)
@@ -901,42 +989,41 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
901 atomic_set(&auth->au_count, 1); 989 atomic_set(&auth->au_count, 1);
902 kref_init(&gss_auth->kref); 990 kref_init(&gss_auth->kref);
903 991
992 err = rpcauth_init_credcache(auth);
993 if (err)
994 goto err_put_mech;
904 /* 995 /*
905 * Note: if we created the old pipe first, then someone who 996 * Note: if we created the old pipe first, then someone who
906 * examined the directory at the right moment might conclude 997 * examined the directory at the right moment might conclude
907 * that we supported only the old pipe. So we instead create 998 * that we supported only the old pipe. So we instead create
908 * the new pipe first. 999 * the new pipe first.
909 */ 1000 */
910 gss_auth->pipe[1] = rpc_mkpipe_data(&gss_upcall_ops_v1, 1001 gss_pipe = gss_pipe_get(clnt, "gssd", &gss_upcall_ops_v1);
911 RPC_PIPE_WAIT_FOR_OPEN); 1002 if (IS_ERR(gss_pipe)) {
912 if (IS_ERR(gss_auth->pipe[1])) { 1003 err = PTR_ERR(gss_pipe);
913 err = PTR_ERR(gss_auth->pipe[1]); 1004 goto err_destroy_credcache;
914 goto err_put_mech;
915 } 1005 }
1006 gss_auth->gss_pipe[1] = gss_pipe;
916 1007
917 gss_auth->pipe[0] = rpc_mkpipe_data(&gss_upcall_ops_v0, 1008 gss_pipe = gss_pipe_get(clnt, gss_auth->mech->gm_name,
918 RPC_PIPE_WAIT_FOR_OPEN); 1009 &gss_upcall_ops_v0);
919 if (IS_ERR(gss_auth->pipe[0])) { 1010 if (IS_ERR(gss_pipe)) {
920 err = PTR_ERR(gss_auth->pipe[0]); 1011 err = PTR_ERR(gss_pipe);
921 goto err_destroy_pipe_1; 1012 goto err_destroy_pipe_1;
922 } 1013 }
923 err = gss_pipes_dentries_create_net(clnt, auth); 1014 gss_auth->gss_pipe[0] = gss_pipe;
924 if (err)
925 goto err_destroy_pipe_0;
926 err = rpcauth_init_credcache(auth);
927 if (err)
928 goto err_unlink_pipes;
929 1015
930 return auth; 1016 return gss_auth;
931err_unlink_pipes:
932 gss_pipes_dentries_destroy_net(clnt, auth);
933err_destroy_pipe_0:
934 rpc_destroy_pipe_data(gss_auth->pipe[0]);
935err_destroy_pipe_1: 1017err_destroy_pipe_1:
936 rpc_destroy_pipe_data(gss_auth->pipe[1]); 1018 gss_pipe_free(gss_auth->gss_pipe[1]);
1019err_destroy_credcache:
1020 rpcauth_destroy_credcache(auth);
937err_put_mech: 1021err_put_mech:
938 gss_mech_put(gss_auth->mech); 1022 gss_mech_put(gss_auth->mech);
1023err_put_net:
1024 put_net(gss_auth->net);
939err_free: 1025err_free:
1026 kfree(gss_auth->target_name);
940 kfree(gss_auth); 1027 kfree(gss_auth);
941out_dec: 1028out_dec:
942 module_put(THIS_MODULE); 1029 module_put(THIS_MODULE);
@@ -946,10 +1033,11 @@ out_dec:
946static void 1033static void
947gss_free(struct gss_auth *gss_auth) 1034gss_free(struct gss_auth *gss_auth)
948{ 1035{
949 gss_pipes_dentries_destroy_net(gss_auth->client, &gss_auth->rpc_auth); 1036 gss_pipe_free(gss_auth->gss_pipe[0]);
950 rpc_destroy_pipe_data(gss_auth->pipe[0]); 1037 gss_pipe_free(gss_auth->gss_pipe[1]);
951 rpc_destroy_pipe_data(gss_auth->pipe[1]);
952 gss_mech_put(gss_auth->mech); 1038 gss_mech_put(gss_auth->mech);
1039 put_net(gss_auth->net);
1040 kfree(gss_auth->target_name);
953 1041
954 kfree(gss_auth); 1042 kfree(gss_auth);
955 module_put(THIS_MODULE); 1043 module_put(THIS_MODULE);
@@ -966,18 +1054,113 @@ gss_free_callback(struct kref *kref)
966static void 1054static void
967gss_destroy(struct rpc_auth *auth) 1055gss_destroy(struct rpc_auth *auth)
968{ 1056{
969 struct gss_auth *gss_auth; 1057 struct gss_auth *gss_auth = container_of(auth,
1058 struct gss_auth, rpc_auth);
970 1059
971 dprintk("RPC: destroying GSS authenticator %p flavor %d\n", 1060 dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
972 auth, auth->au_flavor); 1061 auth, auth->au_flavor);
973 1062
1063 if (hash_hashed(&gss_auth->hash)) {
1064 spin_lock(&gss_auth_hash_lock);
1065 hash_del(&gss_auth->hash);
1066 spin_unlock(&gss_auth_hash_lock);
1067 }
1068
1069 gss_pipe_free(gss_auth->gss_pipe[0]);
1070 gss_auth->gss_pipe[0] = NULL;
1071 gss_pipe_free(gss_auth->gss_pipe[1]);
1072 gss_auth->gss_pipe[1] = NULL;
974 rpcauth_destroy_credcache(auth); 1073 rpcauth_destroy_credcache(auth);
975 1074
976 gss_auth = container_of(auth, struct gss_auth, rpc_auth);
977 kref_put(&gss_auth->kref, gss_free_callback); 1075 kref_put(&gss_auth->kref, gss_free_callback);
978} 1076}
979 1077
980/* 1078/*
1079 * Auths may be shared between rpc clients that were cloned from a
1080 * common client with the same xprt, if they also share the flavor and
1081 * target_name.
1082 *
1083 * The auth is looked up from the oldest parent sharing the same
1084 * cl_xprt, and the auth itself references only that common parent
1085 * (which is guaranteed to last as long as any of its descendants).
1086 */
1087static struct gss_auth *
1088gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args,
1089 struct rpc_clnt *clnt,
1090 struct gss_auth *new)
1091{
1092 struct gss_auth *gss_auth;
1093 unsigned long hashval = (unsigned long)clnt;
1094
1095 spin_lock(&gss_auth_hash_lock);
1096 hash_for_each_possible(gss_auth_hash_table,
1097 gss_auth,
1098 hash,
1099 hashval) {
1100 if (gss_auth->client != clnt)
1101 continue;
1102 if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor)
1103 continue;
1104 if (gss_auth->target_name != args->target_name) {
1105 if (gss_auth->target_name == NULL)
1106 continue;
1107 if (args->target_name == NULL)
1108 continue;
1109 if (strcmp(gss_auth->target_name, args->target_name))
1110 continue;
1111 }
1112 if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
1113 continue;
1114 goto out;
1115 }
1116 if (new)
1117 hash_add(gss_auth_hash_table, &new->hash, hashval);
1118 gss_auth = new;
1119out:
1120 spin_unlock(&gss_auth_hash_lock);
1121 return gss_auth;
1122}
1123
1124static struct gss_auth *
1125gss_create_hashed(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
1126{
1127 struct gss_auth *gss_auth;
1128 struct gss_auth *new;
1129
1130 gss_auth = gss_auth_find_or_add_hashed(args, clnt, NULL);
1131 if (gss_auth != NULL)
1132 goto out;
1133 new = gss_create_new(args, clnt);
1134 if (IS_ERR(new))
1135 return new;
1136 gss_auth = gss_auth_find_or_add_hashed(args, clnt, new);
1137 if (gss_auth != new)
1138 gss_destroy(&new->rpc_auth);
1139out:
1140 return gss_auth;
1141}
1142
1143static struct rpc_auth *
1144gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
1145{
1146 struct gss_auth *gss_auth;
1147 struct rpc_xprt *xprt = rcu_access_pointer(clnt->cl_xprt);
1148
1149 while (clnt != clnt->cl_parent) {
1150 struct rpc_clnt *parent = clnt->cl_parent;
1151 /* Find the original parent for this transport */
1152 if (rcu_access_pointer(parent->cl_xprt) != xprt)
1153 break;
1154 clnt = parent;
1155 }
1156
1157 gss_auth = gss_create_hashed(args, clnt);
1158 if (IS_ERR(gss_auth))
1159 return ERR_CAST(gss_auth);
1160 return &gss_auth->rpc_auth;
1161}
1162
1163/*
981 * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call 1164 * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
982 * to the server with the GSS control procedure field set to 1165 * to the server with the GSS control procedure field set to
983 * RPC_GSS_PROC_DESTROY. This should normally cause the server to release 1166 * RPC_GSS_PROC_DESTROY. This should normally cause the server to release
@@ -1126,10 +1309,32 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
1126 return err; 1309 return err;
1127} 1310}
1128 1311
1312/*
1313 * Returns -EACCES if GSS context is NULL or will expire within the
1314 * timeout (miliseconds)
1315 */
1316static int
1317gss_key_timeout(struct rpc_cred *rc)
1318{
1319 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
1320 unsigned long now = jiffies;
1321 unsigned long expire;
1322
1323 if (gss_cred->gc_ctx == NULL)
1324 return -EACCES;
1325
1326 expire = gss_cred->gc_ctx->gc_expiry - (gss_key_expire_timeo * HZ);
1327
1328 if (time_after(now, expire))
1329 return -EACCES;
1330 return 0;
1331}
1332
1129static int 1333static int
1130gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) 1334gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
1131{ 1335{
1132 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); 1336 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
1337 int ret;
1133 1338
1134 if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) 1339 if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
1135 goto out; 1340 goto out;
@@ -1142,11 +1347,26 @@ out:
1142 if (acred->principal != NULL) { 1347 if (acred->principal != NULL) {
1143 if (gss_cred->gc_principal == NULL) 1348 if (gss_cred->gc_principal == NULL)
1144 return 0; 1349 return 0;
1145 return strcmp(acred->principal, gss_cred->gc_principal) == 0; 1350 ret = strcmp(acred->principal, gss_cred->gc_principal) == 0;
1351 goto check_expire;
1146 } 1352 }
1147 if (gss_cred->gc_principal != NULL) 1353 if (gss_cred->gc_principal != NULL)
1148 return 0; 1354 return 0;
1149 return uid_eq(rc->cr_uid, acred->uid); 1355 ret = uid_eq(rc->cr_uid, acred->uid);
1356
1357check_expire:
1358 if (ret == 0)
1359 return ret;
1360
1361 /* Notify acred users of GSS context expiration timeout */
1362 if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) &&
1363 (gss_key_timeout(rc) != 0)) {
1364 /* test will now be done from generic cred */
1365 test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
1366 /* tell NFS layer that key will expire soon */
1367 set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
1368 }
1369 return ret;
1150} 1370}
1151 1371
1152/* 1372/*
@@ -1292,6 +1512,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
1292 struct xdr_netobj mic; 1512 struct xdr_netobj mic;
1293 u32 flav,len; 1513 u32 flav,len;
1294 u32 maj_stat; 1514 u32 maj_stat;
1515 __be32 *ret = ERR_PTR(-EIO);
1295 1516
1296 dprintk("RPC: %5u %s\n", task->tk_pid, __func__); 1517 dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
1297 1518
@@ -1307,6 +1528,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
1307 mic.data = (u8 *)p; 1528 mic.data = (u8 *)p;
1308 mic.len = len; 1529 mic.len = len;
1309 1530
1531 ret = ERR_PTR(-EACCES);
1310 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); 1532 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
1311 if (maj_stat == GSS_S_CONTEXT_EXPIRED) 1533 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
1312 clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); 1534 clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
@@ -1324,8 +1546,9 @@ gss_validate(struct rpc_task *task, __be32 *p)
1324 return p + XDR_QUADLEN(len); 1546 return p + XDR_QUADLEN(len);
1325out_bad: 1547out_bad:
1326 gss_put_ctx(ctx); 1548 gss_put_ctx(ctx);
1327 dprintk("RPC: %5u %s failed.\n", task->tk_pid, __func__); 1549 dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
1328 return NULL; 1550 PTR_ERR(ret));
1551 return ret;
1329} 1552}
1330 1553
1331static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp, 1554static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
@@ -1657,8 +1880,6 @@ static const struct rpc_authops authgss_ops = {
1657 .destroy = gss_destroy, 1880 .destroy = gss_destroy,
1658 .lookup_cred = gss_lookup_cred, 1881 .lookup_cred = gss_lookup_cred,
1659 .crcreate = gss_create_cred, 1882 .crcreate = gss_create_cred,
1660 .pipes_create = gss_pipes_dentries_create,
1661 .pipes_destroy = gss_pipes_dentries_destroy,
1662 .list_pseudoflavors = gss_mech_list_pseudoflavors, 1883 .list_pseudoflavors = gss_mech_list_pseudoflavors,
1663 .info2flavor = gss_mech_info2flavor, 1884 .info2flavor = gss_mech_info2flavor,
1664 .flavor2info = gss_mech_flavor2info, 1885 .flavor2info = gss_mech_flavor2info,
@@ -1675,6 +1896,7 @@ static const struct rpc_credops gss_credops = {
1675 .crvalidate = gss_validate, 1896 .crvalidate = gss_validate,
1676 .crwrap_req = gss_wrap_req, 1897 .crwrap_req = gss_wrap_req,
1677 .crunwrap_resp = gss_unwrap_resp, 1898 .crunwrap_resp = gss_unwrap_resp,
1899 .crkey_timeout = gss_key_timeout,
1678}; 1900};
1679 1901
1680static const struct rpc_credops gss_nullops = { 1902static const struct rpc_credops gss_nullops = {
@@ -1762,5 +1984,12 @@ module_param_named(expired_cred_retry_delay,
1762MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until " 1984MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
1763 "the RPC engine retries an expired credential"); 1985 "the RPC engine retries an expired credential");
1764 1986
1987module_param_named(key_expire_timeo,
1988 gss_key_expire_timeo,
1989 uint, 0644);
1990MODULE_PARM_DESC(key_expire_timeo, "Time (in seconds) at the end of a "
1991 "credential keys lifetime where the NFS layer cleans up "
1992 "prior to key expiration");
1993
1765module_init(init_rpcsec_gss) 1994module_init(init_rpcsec_gss)
1766module_exit(exit_rpcsec_gss) 1995module_exit(exit_rpcsec_gss)
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index defa9d33925c..27ce26240932 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -139,11 +139,12 @@ void gss_mech_unregister(struct gss_api_mech *gm)
139} 139}
140EXPORT_SYMBOL_GPL(gss_mech_unregister); 140EXPORT_SYMBOL_GPL(gss_mech_unregister);
141 141
142static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) 142struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm)
143{ 143{
144 __module_get(gm->gm_owner); 144 __module_get(gm->gm_owner);
145 return gm; 145 return gm;
146} 146}
147EXPORT_SYMBOL(gss_mech_get);
147 148
148static struct gss_api_mech * 149static struct gss_api_mech *
149_gss_mech_get_by_name(const char *name) 150_gss_mech_get_by_name(const char *name)
@@ -360,6 +361,7 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
360 } 361 }
361 return 0; 362 return 0;
362} 363}
364EXPORT_SYMBOL(gss_pseudoflavor_to_service);
363 365
364char * 366char *
365gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) 367gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
@@ -379,6 +381,7 @@ gss_mech_put(struct gss_api_mech * gm)
379 if (gm) 381 if (gm)
380 module_put(gm->gm_owner); 382 module_put(gm->gm_owner);
381} 383}
384EXPORT_SYMBOL(gss_mech_put);
382 385
383/* The mech could probably be determined from the token instead, but it's just 386/* The mech could probably be determined from the token instead, but it's just
384 * as easy for now to pass it in. */ 387 * as easy for now to pass it in. */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index d304f41260f2..f1eb0d16666c 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -120,7 +120,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
120 if (IS_ERR(clnt)) { 120 if (IS_ERR(clnt)) {
121 dprintk("RPC: failed to create AF_LOCAL gssproxy " 121 dprintk("RPC: failed to create AF_LOCAL gssproxy "
122 "client (errno %ld).\n", PTR_ERR(clnt)); 122 "client (errno %ld).\n", PTR_ERR(clnt));
123 result = -PTR_ERR(clnt); 123 result = PTR_ERR(clnt);
124 *_clnt = NULL; 124 *_clnt = NULL;
125 goto out; 125 goto out;
126 } 126 }
@@ -213,6 +213,26 @@ static int gssp_call(struct net *net, struct rpc_message *msg)
213 return status; 213 return status;
214} 214}
215 215
216static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
217{
218 int i;
219
220 for (i = 0; i < arg->npages && arg->pages[i]; i++)
221 __free_page(arg->pages[i]);
222}
223
224static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
225{
226 arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE);
227 arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL);
228 /*
229 * XXX: actual pages are allocated by xdr layer in
230 * xdr_partial_copy_from_skb.
231 */
232 if (!arg->pages)
233 return -ENOMEM;
234 return 0;
235}
216 236
217/* 237/*
218 * Public functions 238 * Public functions
@@ -261,10 +281,16 @@ int gssp_accept_sec_context_upcall(struct net *net,
261 arg.context_handle = &ctxh; 281 arg.context_handle = &ctxh;
262 res.output_token->len = GSSX_max_output_token_sz; 282 res.output_token->len = GSSX_max_output_token_sz;
263 283
284 ret = gssp_alloc_receive_pages(&arg);
285 if (ret)
286 return ret;
287
264 /* use nfs/ for targ_name ? */ 288 /* use nfs/ for targ_name ? */
265 289
266 ret = gssp_call(net, &msg); 290 ret = gssp_call(net, &msg);
267 291
292 gssp_free_receive_pages(&arg);
293
268 /* we need to fetch all data even in case of error so 294 /* we need to fetch all data even in case of error so
269 * that we can free special strctures is they have been allocated */ 295 * that we can free special strctures is they have been allocated */
270 data->major_status = res.status.major_status; 296 data->major_status = res.status.major_status;
@@ -328,7 +354,6 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data)
328 kfree(data->in_handle.data); 354 kfree(data->in_handle.data);
329 kfree(data->out_handle.data); 355 kfree(data->out_handle.data);
330 kfree(data->out_token.data); 356 kfree(data->out_token.data);
331 kfree(data->mech_oid.data);
332 free_svc_cred(&data->creds); 357 free_svc_cred(&data->creds);
333} 358}
334 359
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 357f613df7ff..f0f78c5f1c7d 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -166,14 +166,15 @@ static int dummy_dec_opt_array(struct xdr_stream *xdr,
166 return 0; 166 return 0;
167} 167}
168 168
169static int get_s32(void **p, void *max, s32 *res) 169static int get_host_u32(struct xdr_stream *xdr, u32 *res)
170{ 170{
171 void *base = *p; 171 __be32 *p;
172 void *next = (void *)((char *)base + sizeof(s32)); 172
173 if (unlikely(next > max || next < base)) 173 p = xdr_inline_decode(xdr, 4);
174 if (!p)
174 return -EINVAL; 175 return -EINVAL;
175 memcpy(res, base, sizeof(s32)); 176 /* Contents of linux creds are all host-endian: */
176 *p = next; 177 memcpy(res, p, sizeof(u32));
177 return 0; 178 return 0;
178} 179}
179 180
@@ -182,9 +183,9 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
182{ 183{
183 u32 length; 184 u32 length;
184 __be32 *p; 185 __be32 *p;
185 void *q, *end; 186 u32 tmp;
186 s32 tmp; 187 u32 N;
187 int N, i, err; 188 int i, err;
188 189
189 p = xdr_inline_decode(xdr, 4); 190 p = xdr_inline_decode(xdr, 4);
190 if (unlikely(p == NULL)) 191 if (unlikely(p == NULL))
@@ -192,33 +193,28 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
192 193
193 length = be32_to_cpup(p); 194 length = be32_to_cpup(p);
194 195
195 /* FIXME: we do not want to use the scratch buffer for this one 196 if (length > (3 + NGROUPS_MAX) * sizeof(u32))
196 * may need to use functions that allows us to access an io vector
197 * directly */
198 p = xdr_inline_decode(xdr, length);
199 if (unlikely(p == NULL))
200 return -ENOSPC; 197 return -ENOSPC;
201 198
202 q = p;
203 end = q + length;
204
205 /* uid */ 199 /* uid */
206 err = get_s32(&q, end, &tmp); 200 err = get_host_u32(xdr, &tmp);
207 if (err) 201 if (err)
208 return err; 202 return err;
209 creds->cr_uid = make_kuid(&init_user_ns, tmp); 203 creds->cr_uid = make_kuid(&init_user_ns, tmp);
210 204
211 /* gid */ 205 /* gid */
212 err = get_s32(&q, end, &tmp); 206 err = get_host_u32(xdr, &tmp);
213 if (err) 207 if (err)
214 return err; 208 return err;
215 creds->cr_gid = make_kgid(&init_user_ns, tmp); 209 creds->cr_gid = make_kgid(&init_user_ns, tmp);
216 210
217 /* number of additional gid's */ 211 /* number of additional gid's */
218 err = get_s32(&q, end, &tmp); 212 err = get_host_u32(xdr, &tmp);
219 if (err) 213 if (err)
220 return err; 214 return err;
221 N = tmp; 215 N = tmp;
216 if ((3 + N) * sizeof(u32) != length)
217 return -EINVAL;
222 creds->cr_group_info = groups_alloc(N); 218 creds->cr_group_info = groups_alloc(N);
223 if (creds->cr_group_info == NULL) 219 if (creds->cr_group_info == NULL)
224 return -ENOMEM; 220 return -ENOMEM;
@@ -226,7 +222,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
226 /* gid's */ 222 /* gid's */
227 for (i = 0; i < N; i++) { 223 for (i = 0; i < N; i++) {
228 kgid_t kgid; 224 kgid_t kgid;
229 err = get_s32(&q, end, &tmp); 225 err = get_host_u32(xdr, &tmp);
230 if (err) 226 if (err)
231 goto out_free_groups; 227 goto out_free_groups;
232 err = -EINVAL; 228 err = -EINVAL;
@@ -430,7 +426,7 @@ static int dummy_enc_nameattr_array(struct xdr_stream *xdr,
430static int dummy_dec_nameattr_array(struct xdr_stream *xdr, 426static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
431 struct gssx_name_attr_array *naa) 427 struct gssx_name_attr_array *naa)
432{ 428{
433 struct gssx_name_attr dummy; 429 struct gssx_name_attr dummy = { .attr = {.len = 0} };
434 u32 count, i; 430 u32 count, i;
435 __be32 *p; 431 __be32 *p;
436 432
@@ -493,12 +489,13 @@ static int gssx_enc_name(struct xdr_stream *xdr,
493 return err; 489 return err;
494} 490}
495 491
492
496static int gssx_dec_name(struct xdr_stream *xdr, 493static int gssx_dec_name(struct xdr_stream *xdr,
497 struct gssx_name *name) 494 struct gssx_name *name)
498{ 495{
499 struct xdr_netobj dummy_netobj; 496 struct xdr_netobj dummy_netobj = { .len = 0 };
500 struct gssx_name_attr_array dummy_name_attr_array; 497 struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 };
501 struct gssx_option_array dummy_option_array; 498 struct gssx_option_array dummy_option_array = { .count = 0 };
502 int err; 499 int err;
503 500
504 /* name->display_name */ 501 /* name->display_name */
@@ -783,6 +780,9 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
783 /* arg->options */ 780 /* arg->options */
784 err = dummy_enc_opt_array(xdr, &arg->options); 781 err = dummy_enc_opt_array(xdr, &arg->options);
785 782
783 xdr_inline_pages(&req->rq_rcv_buf,
784 PAGE_SIZE/2 /* pretty arbitrary */,
785 arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
786done: 786done:
787 if (err) 787 if (err)
788 dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err); 788 dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 1c98b27d870c..685a688f3d8a 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -147,6 +147,8 @@ struct gssx_arg_accept_sec_context {
147 struct gssx_cb *input_cb; 147 struct gssx_cb *input_cb;
148 u32 ret_deleg_cred; 148 u32 ret_deleg_cred;
149 struct gssx_option_array options; 149 struct gssx_option_array options;
150 struct page **pages;
151 unsigned int npages;
150}; 152};
151 153
152struct gssx_res_accept_sec_context { 154struct gssx_res_accept_sec_context {
@@ -240,7 +242,8 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
240 2 * GSSX_max_princ_sz + \ 242 2 * GSSX_max_princ_sz + \
241 8 + 8 + 4 + 4 + 4) 243 8 + 8 + 4 + 4 + 4)
242#define GSSX_max_output_token_sz 1024 244#define GSSX_max_output_token_sz 1024
243#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4) 245/* grouplist not included; we allocate separate pages for that: */
246#define GSSX_max_creds_sz (4 + 4 + 4 /* + NGROUPS_MAX*4 */)
244#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \ 247#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
245 GSSX_default_ctx_sz + \ 248 GSSX_default_ctx_sz + \
246 GSSX_max_output_token_sz + \ 249 GSSX_max_output_token_sz + \
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 29b4ba93ab3c..09fb638bcaa4 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -377,8 +377,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp)
377 new->handle.data = tmp->handle.data; 377 new->handle.data = tmp->handle.data;
378 tmp->handle.data = NULL; 378 tmp->handle.data = NULL;
379 new->mechctx = NULL; 379 new->mechctx = NULL;
380 new->cred.cr_group_info = NULL; 380 init_svc_cred(&new->cred);
381 new->cred.cr_principal = NULL;
382} 381}
383 382
384static void 383static void
@@ -392,9 +391,7 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp)
392 memset(&new->seqdata, 0, sizeof(new->seqdata)); 391 memset(&new->seqdata, 0, sizeof(new->seqdata));
393 spin_lock_init(&new->seqdata.sd_lock); 392 spin_lock_init(&new->seqdata.sd_lock);
394 new->cred = tmp->cred; 393 new->cred = tmp->cred;
395 tmp->cred.cr_group_info = NULL; 394 init_svc_cred(&tmp->cred);
396 new->cred.cr_principal = tmp->cred.cr_principal;
397 tmp->cred.cr_principal = NULL;
398} 395}
399 396
400static struct cache_head * 397static struct cache_head *
@@ -487,7 +484,7 @@ static int rsc_parse(struct cache_detail *cd,
487 len = qword_get(&mesg, buf, mlen); 484 len = qword_get(&mesg, buf, mlen);
488 if (len < 0) 485 if (len < 0)
489 goto out; 486 goto out;
490 gm = gss_mech_get_by_name(buf); 487 gm = rsci.cred.cr_gss_mech = gss_mech_get_by_name(buf);
491 status = -EOPNOTSUPP; 488 status = -EOPNOTSUPP;
492 if (!gm) 489 if (!gm)
493 goto out; 490 goto out;
@@ -517,7 +514,6 @@ static int rsc_parse(struct cache_detail *cd,
517 rscp = rsc_update(cd, &rsci, rscp); 514 rscp = rsc_update(cd, &rsci, rscp);
518 status = 0; 515 status = 0;
519out: 516out:
520 gss_mech_put(gm);
521 rsc_free(&rsci); 517 rsc_free(&rsci);
522 if (rscp) 518 if (rscp)
523 cache_put(&rscp->h, cd); 519 cache_put(&rscp->h, cd);
@@ -1184,6 +1180,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd,
1184 gm = gss_mech_get_by_OID(&ud->mech_oid); 1180 gm = gss_mech_get_by_OID(&ud->mech_oid);
1185 if (!gm) 1181 if (!gm)
1186 goto out; 1182 goto out;
1183 rsci.cred.cr_gss_mech = gm;
1187 1184
1188 status = -EINVAL; 1185 status = -EINVAL;
1189 /* mech-specific data: */ 1186 /* mech-specific data: */
@@ -1199,7 +1196,6 @@ static int gss_proxy_save_rsc(struct cache_detail *cd,
1199 rscp = rsc_update(cd, &rsci, rscp); 1196 rscp = rsc_update(cd, &rsci, rscp);
1200 status = 0; 1197 status = 0;
1201out: 1198out:
1202 gss_mech_put(gm);
1203 rsc_free(&rsci); 1199 rsc_free(&rsci);
1204 if (rscp) 1200 if (rscp)
1205 cache_put(&rscp->h, cd); 1201 cache_put(&rscp->h, cd);
@@ -1330,7 +1326,7 @@ static int wait_for_gss_proxy(struct net *net, struct file *file)
1330static ssize_t write_gssp(struct file *file, const char __user *buf, 1326static ssize_t write_gssp(struct file *file, const char __user *buf,
1331 size_t count, loff_t *ppos) 1327 size_t count, loff_t *ppos)
1332{ 1328{
1333 struct net *net = PDE_DATA(file->f_path.dentry->d_inode); 1329 struct net *net = PDE_DATA(file_inode(file));
1334 char tbuf[20]; 1330 char tbuf[20];
1335 unsigned long i; 1331 unsigned long i;
1336 int res; 1332 int res;
@@ -1358,7 +1354,7 @@ static ssize_t write_gssp(struct file *file, const char __user *buf,
1358static ssize_t read_gssp(struct file *file, char __user *buf, 1354static ssize_t read_gssp(struct file *file, char __user *buf,
1359 size_t count, loff_t *ppos) 1355 size_t count, loff_t *ppos)
1360{ 1356{
1361 struct net *net = PDE_DATA(file->f_path.dentry->d_inode); 1357 struct net *net = PDE_DATA(file_inode(file));
1362 unsigned long p = *ppos; 1358 unsigned long p = *ppos;
1363 char tbuf[10]; 1359 char tbuf[10];
1364 size_t len; 1360 size_t len;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index a5c36c01707b..f0ebe07978a2 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -18,7 +18,7 @@ static struct rpc_auth null_auth;
18static struct rpc_cred null_cred; 18static struct rpc_cred null_cred;
19 19
20static struct rpc_auth * 20static struct rpc_auth *
21nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) 21nul_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
22{ 22{
23 atomic_inc(&null_auth.au_count); 23 atomic_inc(&null_auth.au_count);
24 return &null_auth; 24 return &null_auth;
@@ -88,13 +88,13 @@ nul_validate(struct rpc_task *task, __be32 *p)
88 flavor = ntohl(*p++); 88 flavor = ntohl(*p++);
89 if (flavor != RPC_AUTH_NULL) { 89 if (flavor != RPC_AUTH_NULL) {
90 printk("RPC: bad verf flavor: %u\n", flavor); 90 printk("RPC: bad verf flavor: %u\n", flavor);
91 return NULL; 91 return ERR_PTR(-EIO);
92 } 92 }
93 93
94 size = ntohl(*p++); 94 size = ntohl(*p++);
95 if (size != 0) { 95 if (size != 0) {
96 printk("RPC: bad verf size: %u\n", size); 96 printk("RPC: bad verf size: %u\n", size);
97 return NULL; 97 return ERR_PTR(-EIO);
98 } 98 }
99 99
100 return p; 100 return p;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index dc37021fc3e5..d5d692366294 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -33,7 +33,7 @@ static struct rpc_auth unix_auth;
33static const struct rpc_credops unix_credops; 33static const struct rpc_credops unix_credops;
34 34
35static struct rpc_auth * 35static struct rpc_auth *
36unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) 36unx_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
37{ 37{
38 dprintk("RPC: creating UNIX authenticator for client %p\n", 38 dprintk("RPC: creating UNIX authenticator for client %p\n",
39 clnt); 39 clnt);
@@ -192,13 +192,13 @@ unx_validate(struct rpc_task *task, __be32 *p)
192 flavor != RPC_AUTH_UNIX && 192 flavor != RPC_AUTH_UNIX &&
193 flavor != RPC_AUTH_SHORT) { 193 flavor != RPC_AUTH_SHORT) {
194 printk("RPC: bad verf flavor: %u\n", flavor); 194 printk("RPC: bad verf flavor: %u\n", flavor);
195 return NULL; 195 return ERR_PTR(-EIO);
196 } 196 }
197 197
198 size = ntohl(*p++); 198 size = ntohl(*p++);
199 if (size > RPC_MAX_AUTH_SIZE) { 199 if (size > RPC_MAX_AUTH_SIZE) {
200 printk("RPC: giant verf size: %u\n", size); 200 printk("RPC: giant verf size: %u\n", size);
201 return NULL; 201 return ERR_PTR(-EIO);
202 } 202 }
203 task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2; 203 task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
204 p += (size >> 2); 204 p += (size >> 2);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 80fe5c86efd1..a72de074172d 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -50,12 +50,6 @@ static void cache_init(struct cache_head *h)
50 h->last_refresh = now; 50 h->last_refresh = now;
51} 51}
52 52
53static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
54{
55 return (h->expiry_time < seconds_since_boot()) ||
56 (detail->flush_time > h->last_refresh);
57}
58
59struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, 53struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
60 struct cache_head *key, int hash) 54 struct cache_head *key, int hash)
61{ 55{
@@ -201,7 +195,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
201 return sunrpc_cache_pipe_upcall(cd, h); 195 return sunrpc_cache_pipe_upcall(cd, h);
202} 196}
203 197
204static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) 198static inline int cache_is_valid(struct cache_head *h)
205{ 199{
206 if (!test_bit(CACHE_VALID, &h->flags)) 200 if (!test_bit(CACHE_VALID, &h->flags))
207 return -EAGAIN; 201 return -EAGAIN;
@@ -227,16 +221,15 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
227 int rv; 221 int rv;
228 222
229 write_lock(&detail->hash_lock); 223 write_lock(&detail->hash_lock);
230 rv = cache_is_valid(detail, h); 224 rv = cache_is_valid(h);
231 if (rv != -EAGAIN) { 225 if (rv == -EAGAIN) {
232 write_unlock(&detail->hash_lock); 226 set_bit(CACHE_NEGATIVE, &h->flags);
233 return rv; 227 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
228 rv = -ENOENT;
234 } 229 }
235 set_bit(CACHE_NEGATIVE, &h->flags);
236 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
237 write_unlock(&detail->hash_lock); 230 write_unlock(&detail->hash_lock);
238 cache_fresh_unlocked(h, detail); 231 cache_fresh_unlocked(h, detail);
239 return -ENOENT; 232 return rv;
240} 233}
241 234
242/* 235/*
@@ -260,7 +253,7 @@ int cache_check(struct cache_detail *detail,
260 long refresh_age, age; 253 long refresh_age, age;
261 254
262 /* First decide return status as best we can */ 255 /* First decide return status as best we can */
263 rv = cache_is_valid(detail, h); 256 rv = cache_is_valid(h);
264 257
265 /* now see if we want to start an upcall */ 258 /* now see if we want to start an upcall */
266 refresh_age = (h->expiry_time - h->last_refresh); 259 refresh_age = (h->expiry_time - h->last_refresh);
@@ -269,19 +262,17 @@ int cache_check(struct cache_detail *detail,
269 if (rqstp == NULL) { 262 if (rqstp == NULL) {
270 if (rv == -EAGAIN) 263 if (rv == -EAGAIN)
271 rv = -ENOENT; 264 rv = -ENOENT;
272 } else if (rv == -EAGAIN || age > refresh_age/2) { 265 } else if (rv == -EAGAIN ||
266 (h->expiry_time != 0 && age > refresh_age/2)) {
273 dprintk("RPC: Want update, refage=%ld, age=%ld\n", 267 dprintk("RPC: Want update, refage=%ld, age=%ld\n",
274 refresh_age, age); 268 refresh_age, age);
275 if (!test_and_set_bit(CACHE_PENDING, &h->flags)) { 269 if (!test_and_set_bit(CACHE_PENDING, &h->flags)) {
276 switch (cache_make_upcall(detail, h)) { 270 switch (cache_make_upcall(detail, h)) {
277 case -EINVAL: 271 case -EINVAL:
278 clear_bit(CACHE_PENDING, &h->flags);
279 cache_revisit_request(h);
280 rv = try_to_negate_entry(detail, h); 272 rv = try_to_negate_entry(detail, h);
281 break; 273 break;
282 case -EAGAIN: 274 case -EAGAIN:
283 clear_bit(CACHE_PENDING, &h->flags); 275 cache_fresh_unlocked(h, detail);
284 cache_revisit_request(h);
285 break; 276 break;
286 } 277 }
287 } 278 }
@@ -293,7 +284,7 @@ int cache_check(struct cache_detail *detail,
293 * Request was not deferred; handle it as best 284 * Request was not deferred; handle it as best
294 * we can ourselves: 285 * we can ourselves:
295 */ 286 */
296 rv = cache_is_valid(detail, h); 287 rv = cache_is_valid(h);
297 if (rv == -EAGAIN) 288 if (rv == -EAGAIN)
298 rv = -ETIMEDOUT; 289 rv = -ETIMEDOUT;
299 } 290 }
@@ -310,7 +301,7 @@ EXPORT_SYMBOL_GPL(cache_check);
310 * a current pointer into that list and into the table 301 * a current pointer into that list and into the table
311 * for that entry. 302 * for that entry.
312 * 303 *
313 * Each time clean_cache is called it finds the next non-empty entry 304 * Each time cache_clean is called it finds the next non-empty entry
314 * in the current table and walks the list in that entry 305 * in the current table and walks the list in that entry
315 * looking for entries that can be removed. 306 * looking for entries that can be removed.
316 * 307 *
@@ -457,9 +448,8 @@ static int cache_clean(void)
457 current_index ++; 448 current_index ++;
458 spin_unlock(&cache_list_lock); 449 spin_unlock(&cache_list_lock);
459 if (ch) { 450 if (ch) {
460 if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) 451 set_bit(CACHE_CLEANED, &ch->flags);
461 cache_dequeue(current_detail, ch); 452 cache_fresh_unlocked(ch, d);
462 cache_revisit_request(ch);
463 cache_put(ch, d); 453 cache_put(ch, d);
464 } 454 }
465 } else 455 } else
@@ -1036,23 +1026,32 @@ static int cache_release(struct inode *inode, struct file *filp,
1036 1026
1037static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch) 1027static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
1038{ 1028{
1039 struct cache_queue *cq; 1029 struct cache_queue *cq, *tmp;
1030 struct cache_request *cr;
1031 struct list_head dequeued;
1032
1033 INIT_LIST_HEAD(&dequeued);
1040 spin_lock(&queue_lock); 1034 spin_lock(&queue_lock);
1041 list_for_each_entry(cq, &detail->queue, list) 1035 list_for_each_entry_safe(cq, tmp, &detail->queue, list)
1042 if (!cq->reader) { 1036 if (!cq->reader) {
1043 struct cache_request *cr = container_of(cq, struct cache_request, q); 1037 cr = container_of(cq, struct cache_request, q);
1044 if (cr->item != ch) 1038 if (cr->item != ch)
1045 continue; 1039 continue;
1040 if (test_bit(CACHE_PENDING, &ch->flags))
1041 /* Lost a race and it is pending again */
1042 break;
1046 if (cr->readers != 0) 1043 if (cr->readers != 0)
1047 continue; 1044 continue;
1048 list_del(&cr->q.list); 1045 list_move(&cr->q.list, &dequeued);
1049 spin_unlock(&queue_lock);
1050 cache_put(cr->item, detail);
1051 kfree(cr->buf);
1052 kfree(cr);
1053 return;
1054 } 1046 }
1055 spin_unlock(&queue_lock); 1047 spin_unlock(&queue_lock);
1048 while (!list_empty(&dequeued)) {
1049 cr = list_entry(dequeued.next, struct cache_request, q.list);
1050 list_del(&cr->q.list);
1051 cache_put(cr->item, detail);
1052 kfree(cr->buf);
1053 kfree(cr);
1054 }
1056} 1055}
1057 1056
1058/* 1057/*
@@ -1166,6 +1165,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
1166 1165
1167 char *buf; 1166 char *buf;
1168 struct cache_request *crq; 1167 struct cache_request *crq;
1168 int ret = 0;
1169 1169
1170 if (!detail->cache_request) 1170 if (!detail->cache_request)
1171 return -EINVAL; 1171 return -EINVAL;
@@ -1174,6 +1174,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
1174 warn_no_listener(detail); 1174 warn_no_listener(detail);
1175 return -EINVAL; 1175 return -EINVAL;
1176 } 1176 }
1177 if (test_bit(CACHE_CLEANED, &h->flags))
1178 /* Too late to make an upcall */
1179 return -EAGAIN;
1177 1180
1178 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1181 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1179 if (!buf) 1182 if (!buf)
@@ -1191,10 +1194,18 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
1191 crq->len = 0; 1194 crq->len = 0;
1192 crq->readers = 0; 1195 crq->readers = 0;
1193 spin_lock(&queue_lock); 1196 spin_lock(&queue_lock);
1194 list_add_tail(&crq->q.list, &detail->queue); 1197 if (test_bit(CACHE_PENDING, &h->flags))
1198 list_add_tail(&crq->q.list, &detail->queue);
1199 else
1200 /* Lost a race, no longer PENDING, so don't enqueue */
1201 ret = -EAGAIN;
1195 spin_unlock(&queue_lock); 1202 spin_unlock(&queue_lock);
1196 wake_up(&queue_wait); 1203 wake_up(&queue_wait);
1197 return 0; 1204 if (ret == -EAGAIN) {
1205 kfree(buf);
1206 kfree(crq);
1207 }
1208 return ret;
1198} 1209}
1199EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); 1210EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall);
1200 1211
@@ -1812,19 +1823,11 @@ int sunrpc_cache_register_pipefs(struct dentry *parent,
1812 const char *name, umode_t umode, 1823 const char *name, umode_t umode,
1813 struct cache_detail *cd) 1824 struct cache_detail *cd)
1814{ 1825{
1815 struct qstr q; 1826 struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd);
1816 struct dentry *dir; 1827 if (IS_ERR(dir))
1817 int ret = 0; 1828 return PTR_ERR(dir);
1818 1829 cd->u.pipefs.dir = dir;
1819 q.name = name; 1830 return 0;
1820 q.len = strlen(name);
1821 q.hash = full_name_hash(q.name, q.len);
1822 dir = rpc_create_cache_dir(parent, &q, umode, cd);
1823 if (!IS_ERR(dir))
1824 cd->u.pipefs.dir = dir;
1825 else
1826 ret = PTR_ERR(dir);
1827 return ret;
1828} 1831}
1829EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); 1832EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs);
1830 1833
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 5a750b9c3640..77479606a971 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -102,12 +102,7 @@ static void rpc_unregister_client(struct rpc_clnt *clnt)
102 102
103static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) 103static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
104{ 104{
105 if (clnt->cl_dentry) { 105 rpc_remove_client_dir(clnt);
106 if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy)
107 clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth);
108 rpc_remove_client_dir(clnt->cl_dentry);
109 }
110 clnt->cl_dentry = NULL;
111} 106}
112 107
113static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) 108static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
@@ -123,14 +118,12 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
123} 118}
124 119
125static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, 120static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
126 struct rpc_clnt *clnt, 121 struct rpc_clnt *clnt)
127 const char *dir_name)
128{ 122{
129 static uint32_t clntid; 123 static uint32_t clntid;
124 const char *dir_name = clnt->cl_program->pipe_dir_name;
130 char name[15]; 125 char name[15];
131 struct qstr q = { .name = name };
132 struct dentry *dir, *dentry; 126 struct dentry *dir, *dentry;
133 int error;
134 127
135 dir = rpc_d_lookup_sb(sb, dir_name); 128 dir = rpc_d_lookup_sb(sb, dir_name);
136 if (dir == NULL) { 129 if (dir == NULL) {
@@ -138,50 +131,52 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
138 return dir; 131 return dir;
139 } 132 }
140 for (;;) { 133 for (;;) {
141 q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); 134 snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++);
142 name[sizeof(name) - 1] = '\0'; 135 name[sizeof(name) - 1] = '\0';
143 q.hash = full_name_hash(q.name, q.len); 136 dentry = rpc_create_client_dir(dir, name, clnt);
144 dentry = rpc_create_client_dir(dir, &q, clnt);
145 if (!IS_ERR(dentry)) 137 if (!IS_ERR(dentry))
146 break; 138 break;
147 error = PTR_ERR(dentry); 139 if (dentry == ERR_PTR(-EEXIST))
148 if (error != -EEXIST) { 140 continue;
149 printk(KERN_INFO "RPC: Couldn't create pipefs entry" 141 printk(KERN_INFO "RPC: Couldn't create pipefs entry"
150 " %s/%s, error %d\n", 142 " %s/%s, error %ld\n",
151 dir_name, name, error); 143 dir_name, name, PTR_ERR(dentry));
152 break; 144 break;
153 }
154 } 145 }
155 dput(dir); 146 dput(dir);
156 return dentry; 147 return dentry;
157} 148}
158 149
159static int 150static int
160rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name) 151rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
161{ 152{
162 struct net *net = rpc_net_ns(clnt);
163 struct super_block *pipefs_sb;
164 struct dentry *dentry; 153 struct dentry *dentry;
165 154
166 clnt->cl_dentry = NULL; 155 if (clnt->cl_program->pipe_dir_name != NULL) {
167 if (dir_name == NULL) 156 dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
168 return 0; 157 if (IS_ERR(dentry))
169 pipefs_sb = rpc_get_sb_net(net); 158 return PTR_ERR(dentry);
170 if (!pipefs_sb) 159 }
171 return 0;
172 dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name);
173 rpc_put_sb_net(net);
174 if (IS_ERR(dentry))
175 return PTR_ERR(dentry);
176 clnt->cl_dentry = dentry;
177 return 0; 160 return 0;
178} 161}
179 162
180static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event) 163static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
181{ 164{
182 if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) || 165 if (clnt->cl_program->pipe_dir_name == NULL)
183 ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
184 return 1; 166 return 1;
167
168 switch (event) {
169 case RPC_PIPEFS_MOUNT:
170 if (clnt->cl_pipedir_objects.pdh_dentry != NULL)
171 return 1;
172 if (atomic_read(&clnt->cl_count) == 0)
173 return 1;
174 break;
175 case RPC_PIPEFS_UMOUNT:
176 if (clnt->cl_pipedir_objects.pdh_dentry == NULL)
177 return 1;
178 break;
179 }
185 return 0; 180 return 0;
186} 181}
187 182
@@ -193,18 +188,11 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
193 188
194 switch (event) { 189 switch (event) {
195 case RPC_PIPEFS_MOUNT: 190 case RPC_PIPEFS_MOUNT:
196 dentry = rpc_setup_pipedir_sb(sb, clnt, 191 dentry = rpc_setup_pipedir_sb(sb, clnt);
197 clnt->cl_program->pipe_dir_name);
198 if (!dentry) 192 if (!dentry)
199 return -ENOENT; 193 return -ENOENT;
200 if (IS_ERR(dentry)) 194 if (IS_ERR(dentry))
201 return PTR_ERR(dentry); 195 return PTR_ERR(dentry);
202 clnt->cl_dentry = dentry;
203 if (clnt->cl_auth->au_ops->pipes_create) {
204 err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth);
205 if (err)
206 __rpc_clnt_remove_pipedir(clnt);
207 }
208 break; 196 break;
209 case RPC_PIPEFS_UMOUNT: 197 case RPC_PIPEFS_UMOUNT:
210 __rpc_clnt_remove_pipedir(clnt); 198 __rpc_clnt_remove_pipedir(clnt);
@@ -237,12 +225,8 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
237 225
238 spin_lock(&sn->rpc_client_lock); 226 spin_lock(&sn->rpc_client_lock);
239 list_for_each_entry(clnt, &sn->all_clients, cl_clients) { 227 list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
240 if (clnt->cl_program->pipe_dir_name == NULL)
241 continue;
242 if (rpc_clnt_skip_event(clnt, event)) 228 if (rpc_clnt_skip_event(clnt, event))
243 continue; 229 continue;
244 if (atomic_inc_not_zero(&clnt->cl_count) == 0)
245 continue;
246 spin_unlock(&sn->rpc_client_lock); 230 spin_unlock(&sn->rpc_client_lock);
247 return clnt; 231 return clnt;
248 } 232 }
@@ -259,7 +243,6 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
259 243
260 while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) { 244 while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) {
261 error = __rpc_pipefs_event(clnt, event, sb); 245 error = __rpc_pipefs_event(clnt, event, sb);
262 rpc_release_client(clnt);
263 if (error) 246 if (error)
264 break; 247 break;
265 } 248 }
@@ -289,12 +272,72 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
289 memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen); 272 memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen);
290} 273}
291 274
292static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) 275static int rpc_client_register(const struct rpc_create_args *args,
276 struct rpc_clnt *clnt)
277{
278 struct rpc_auth_create_args auth_args = {
279 .pseudoflavor = args->authflavor,
280 .target_name = args->client_name,
281 };
282 struct rpc_auth *auth;
283 struct net *net = rpc_net_ns(clnt);
284 struct super_block *pipefs_sb;
285 int err;
286
287 pipefs_sb = rpc_get_sb_net(net);
288 if (pipefs_sb) {
289 err = rpc_setup_pipedir(pipefs_sb, clnt);
290 if (err)
291 goto out;
292 }
293
294 rpc_register_client(clnt);
295 if (pipefs_sb)
296 rpc_put_sb_net(net);
297
298 auth = rpcauth_create(&auth_args, clnt);
299 if (IS_ERR(auth)) {
300 dprintk("RPC: Couldn't create auth handle (flavor %u)\n",
301 args->authflavor);
302 err = PTR_ERR(auth);
303 goto err_auth;
304 }
305 return 0;
306err_auth:
307 pipefs_sb = rpc_get_sb_net(net);
308 rpc_unregister_client(clnt);
309 __rpc_clnt_remove_pipedir(clnt);
310out:
311 if (pipefs_sb)
312 rpc_put_sb_net(net);
313 return err;
314}
315
316static DEFINE_IDA(rpc_clids);
317
318static int rpc_alloc_clid(struct rpc_clnt *clnt)
319{
320 int clid;
321
322 clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL);
323 if (clid < 0)
324 return clid;
325 clnt->cl_clid = clid;
326 return 0;
327}
328
329static void rpc_free_clid(struct rpc_clnt *clnt)
330{
331 ida_simple_remove(&rpc_clids, clnt->cl_clid);
332}
333
334static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
335 struct rpc_xprt *xprt,
336 struct rpc_clnt *parent)
293{ 337{
294 const struct rpc_program *program = args->program; 338 const struct rpc_program *program = args->program;
295 const struct rpc_version *version; 339 const struct rpc_version *version;
296 struct rpc_clnt *clnt = NULL; 340 struct rpc_clnt *clnt = NULL;
297 struct rpc_auth *auth;
298 int err; 341 int err;
299 342
300 /* sanity check the name before trying to print it */ 343 /* sanity check the name before trying to print it */
@@ -316,16 +359,20 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
316 clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); 359 clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
317 if (!clnt) 360 if (!clnt)
318 goto out_err; 361 goto out_err;
319 clnt->cl_parent = clnt; 362 clnt->cl_parent = parent ? : clnt;
363
364 err = rpc_alloc_clid(clnt);
365 if (err)
366 goto out_no_clid;
320 367
321 rcu_assign_pointer(clnt->cl_xprt, xprt); 368 rcu_assign_pointer(clnt->cl_xprt, xprt);
322 clnt->cl_procinfo = version->procs; 369 clnt->cl_procinfo = version->procs;
323 clnt->cl_maxproc = version->nrprocs; 370 clnt->cl_maxproc = version->nrprocs;
324 clnt->cl_protname = program->name;
325 clnt->cl_prog = args->prognumber ? : program->number; 371 clnt->cl_prog = args->prognumber ? : program->number;
326 clnt->cl_vers = version->number; 372 clnt->cl_vers = version->number;
327 clnt->cl_stats = program->stats; 373 clnt->cl_stats = program->stats;
328 clnt->cl_metrics = rpc_alloc_iostats(clnt); 374 clnt->cl_metrics = rpc_alloc_iostats(clnt);
375 rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
329 err = -ENOMEM; 376 err = -ENOMEM;
330 if (clnt->cl_metrics == NULL) 377 if (clnt->cl_metrics == NULL)
331 goto out_no_stats; 378 goto out_no_stats;
@@ -345,39 +392,24 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
345 392
346 clnt->cl_rtt = &clnt->cl_rtt_default; 393 clnt->cl_rtt = &clnt->cl_rtt_default;
347 rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval); 394 rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
348 clnt->cl_principal = NULL;
349 if (args->client_name) {
350 clnt->cl_principal = kstrdup(args->client_name, GFP_KERNEL);
351 if (!clnt->cl_principal)
352 goto out_no_principal;
353 }
354 395
355 atomic_set(&clnt->cl_count, 1); 396 atomic_set(&clnt->cl_count, 1);
356 397
357 err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
358 if (err < 0)
359 goto out_no_path;
360
361 auth = rpcauth_create(args->authflavor, clnt);
362 if (IS_ERR(auth)) {
363 dprintk("RPC: Couldn't create auth handle (flavor %u)\n",
364 args->authflavor);
365 err = PTR_ERR(auth);
366 goto out_no_auth;
367 }
368
369 /* save the nodename */ 398 /* save the nodename */
370 rpc_clnt_set_nodename(clnt, utsname()->nodename); 399 rpc_clnt_set_nodename(clnt, utsname()->nodename);
371 rpc_register_client(clnt); 400
401 err = rpc_client_register(args, clnt);
402 if (err)
403 goto out_no_path;
404 if (parent)
405 atomic_inc(&parent->cl_count);
372 return clnt; 406 return clnt;
373 407
374out_no_auth:
375 rpc_clnt_remove_pipedir(clnt);
376out_no_path: 408out_no_path:
377 kfree(clnt->cl_principal);
378out_no_principal:
379 rpc_free_iostats(clnt->cl_metrics); 409 rpc_free_iostats(clnt->cl_metrics);
380out_no_stats: 410out_no_stats:
411 rpc_free_clid(clnt);
412out_no_clid:
381 kfree(clnt); 413 kfree(clnt);
382out_err: 414out_err:
383 rpciod_down(); 415 rpciod_down();
@@ -463,7 +495,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
463 if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) 495 if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
464 xprt->resvport = 0; 496 xprt->resvport = 0;
465 497
466 clnt = rpc_new_client(args, xprt); 498 clnt = rpc_new_client(args, xprt, NULL);
467 if (IS_ERR(clnt)) 499 if (IS_ERR(clnt))
468 return clnt; 500 return clnt;
469 501
@@ -510,15 +542,12 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
510 goto out_err; 542 goto out_err;
511 args->servername = xprt->servername; 543 args->servername = xprt->servername;
512 544
513 new = rpc_new_client(args, xprt); 545 new = rpc_new_client(args, xprt, clnt);
514 if (IS_ERR(new)) { 546 if (IS_ERR(new)) {
515 err = PTR_ERR(new); 547 err = PTR_ERR(new);
516 goto out_err; 548 goto out_err;
517 } 549 }
518 550
519 atomic_inc(&clnt->cl_count);
520 new->cl_parent = clnt;
521
522 /* Turn off autobind on clones */ 551 /* Turn off autobind on clones */
523 new->cl_autobind = 0; 552 new->cl_autobind = 0;
524 new->cl_softrtry = clnt->cl_softrtry; 553 new->cl_softrtry = clnt->cl_softrtry;
@@ -545,7 +574,6 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
545 .prognumber = clnt->cl_prog, 574 .prognumber = clnt->cl_prog,
546 .version = clnt->cl_vers, 575 .version = clnt->cl_vers,
547 .authflavor = clnt->cl_auth->au_flavor, 576 .authflavor = clnt->cl_auth->au_flavor,
548 .client_name = clnt->cl_principal,
549 }; 577 };
550 return __rpc_clone_client(&args, clnt); 578 return __rpc_clone_client(&args, clnt);
551} 579}
@@ -567,7 +595,6 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
567 .prognumber = clnt->cl_prog, 595 .prognumber = clnt->cl_prog,
568 .version = clnt->cl_vers, 596 .version = clnt->cl_vers,
569 .authflavor = flavor, 597 .authflavor = flavor,
570 .client_name = clnt->cl_principal,
571 }; 598 };
572 return __rpc_clone_client(&args, clnt); 599 return __rpc_clone_client(&args, clnt);
573} 600}
@@ -613,7 +640,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt)
613 might_sleep(); 640 might_sleep();
614 641
615 dprintk_rcu("RPC: shutting down %s client for %s\n", 642 dprintk_rcu("RPC: shutting down %s client for %s\n",
616 clnt->cl_protname, 643 clnt->cl_program->name,
617 rcu_dereference(clnt->cl_xprt)->servername); 644 rcu_dereference(clnt->cl_xprt)->servername);
618 645
619 while (!list_empty(&clnt->cl_tasks)) { 646 while (!list_empty(&clnt->cl_tasks)) {
@@ -633,17 +660,17 @@ static void
633rpc_free_client(struct rpc_clnt *clnt) 660rpc_free_client(struct rpc_clnt *clnt)
634{ 661{
635 dprintk_rcu("RPC: destroying %s client for %s\n", 662 dprintk_rcu("RPC: destroying %s client for %s\n",
636 clnt->cl_protname, 663 clnt->cl_program->name,
637 rcu_dereference(clnt->cl_xprt)->servername); 664 rcu_dereference(clnt->cl_xprt)->servername);
638 if (clnt->cl_parent != clnt) 665 if (clnt->cl_parent != clnt)
639 rpc_release_client(clnt->cl_parent); 666 rpc_release_client(clnt->cl_parent);
640 rpc_unregister_client(clnt);
641 rpc_clnt_remove_pipedir(clnt); 667 rpc_clnt_remove_pipedir(clnt);
668 rpc_unregister_client(clnt);
642 rpc_free_iostats(clnt->cl_metrics); 669 rpc_free_iostats(clnt->cl_metrics);
643 kfree(clnt->cl_principal);
644 clnt->cl_metrics = NULL; 670 clnt->cl_metrics = NULL;
645 xprt_put(rcu_dereference_raw(clnt->cl_xprt)); 671 xprt_put(rcu_dereference_raw(clnt->cl_xprt));
646 rpciod_down(); 672 rpciod_down();
673 rpc_free_clid(clnt);
647 kfree(clnt); 674 kfree(clnt);
648} 675}
649 676
@@ -704,7 +731,6 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
704 .prognumber = program->number, 731 .prognumber = program->number,
705 .version = vers, 732 .version = vers,
706 .authflavor = old->cl_auth->au_flavor, 733 .authflavor = old->cl_auth->au_flavor,
707 .client_name = old->cl_principal,
708 }; 734 };
709 struct rpc_clnt *clnt; 735 struct rpc_clnt *clnt;
710 int err; 736 int err;
@@ -1283,7 +1309,7 @@ call_start(struct rpc_task *task)
1283 struct rpc_clnt *clnt = task->tk_client; 1309 struct rpc_clnt *clnt = task->tk_client;
1284 1310
1285 dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, 1311 dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
1286 clnt->cl_protname, clnt->cl_vers, 1312 clnt->cl_program->name, clnt->cl_vers,
1287 rpc_proc_name(task), 1313 rpc_proc_name(task),
1288 (RPC_IS_ASYNC(task) ? "async" : "sync")); 1314 (RPC_IS_ASYNC(task) ? "async" : "sync"));
1289 1315
@@ -1407,9 +1433,9 @@ call_refreshresult(struct rpc_task *task)
1407 return; 1433 return;
1408 case -ETIMEDOUT: 1434 case -ETIMEDOUT:
1409 rpc_delay(task, 3*HZ); 1435 rpc_delay(task, 3*HZ);
1410 case -EKEYEXPIRED:
1411 case -EAGAIN: 1436 case -EAGAIN:
1412 status = -EACCES; 1437 status = -EACCES;
1438 case -EKEYEXPIRED:
1413 if (!task->tk_cred_retry) 1439 if (!task->tk_cred_retry)
1414 break; 1440 break;
1415 task->tk_cred_retry--; 1441 task->tk_cred_retry--;
@@ -1644,6 +1670,10 @@ call_connect(struct rpc_task *task)
1644 task->tk_action = call_connect_status; 1670 task->tk_action = call_connect_status;
1645 if (task->tk_status < 0) 1671 if (task->tk_status < 0)
1646 return; 1672 return;
1673 if (task->tk_flags & RPC_TASK_NOCONNECT) {
1674 rpc_exit(task, -ENOTCONN);
1675 return;
1676 }
1647 xprt_connect(task); 1677 xprt_connect(task);
1648 } 1678 }
1649} 1679}
@@ -1892,7 +1922,7 @@ call_status(struct rpc_task *task)
1892 default: 1922 default:
1893 if (clnt->cl_chatty) 1923 if (clnt->cl_chatty)
1894 printk("%s: RPC call returned error %d\n", 1924 printk("%s: RPC call returned error %d\n",
1895 clnt->cl_protname, -status); 1925 clnt->cl_program->name, -status);
1896 rpc_exit(task, status); 1926 rpc_exit(task, status);
1897 } 1927 }
1898} 1928}
@@ -1923,7 +1953,7 @@ call_timeout(struct rpc_task *task)
1923 if (clnt->cl_chatty) { 1953 if (clnt->cl_chatty) {
1924 rcu_read_lock(); 1954 rcu_read_lock();
1925 printk(KERN_NOTICE "%s: server %s not responding, timed out\n", 1955 printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
1926 clnt->cl_protname, 1956 clnt->cl_program->name,
1927 rcu_dereference(clnt->cl_xprt)->servername); 1957 rcu_dereference(clnt->cl_xprt)->servername);
1928 rcu_read_unlock(); 1958 rcu_read_unlock();
1929 } 1959 }
@@ -1939,7 +1969,7 @@ call_timeout(struct rpc_task *task)
1939 if (clnt->cl_chatty) { 1969 if (clnt->cl_chatty) {
1940 rcu_read_lock(); 1970 rcu_read_lock();
1941 printk(KERN_NOTICE "%s: server %s not responding, still trying\n", 1971 printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
1942 clnt->cl_protname, 1972 clnt->cl_program->name,
1943 rcu_dereference(clnt->cl_xprt)->servername); 1973 rcu_dereference(clnt->cl_xprt)->servername);
1944 rcu_read_unlock(); 1974 rcu_read_unlock();
1945 } 1975 }
@@ -1974,7 +2004,7 @@ call_decode(struct rpc_task *task)
1974 if (clnt->cl_chatty) { 2004 if (clnt->cl_chatty) {
1975 rcu_read_lock(); 2005 rcu_read_lock();
1976 printk(KERN_NOTICE "%s: server %s OK\n", 2006 printk(KERN_NOTICE "%s: server %s OK\n",
1977 clnt->cl_protname, 2007 clnt->cl_program->name,
1978 rcu_dereference(clnt->cl_xprt)->servername); 2008 rcu_dereference(clnt->cl_xprt)->servername);
1979 rcu_read_unlock(); 2009 rcu_read_unlock();
1980 } 2010 }
@@ -1999,7 +2029,7 @@ call_decode(struct rpc_task *task)
1999 goto out_retry; 2029 goto out_retry;
2000 } 2030 }
2001 dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", 2031 dprintk("RPC: %s: too small RPC reply size (%d bytes)\n",
2002 clnt->cl_protname, task->tk_status); 2032 clnt->cl_program->name, task->tk_status);
2003 task->tk_action = call_timeout; 2033 task->tk_action = call_timeout;
2004 goto out_retry; 2034 goto out_retry;
2005 } 2035 }
@@ -2071,7 +2101,8 @@ rpc_verify_header(struct rpc_task *task)
2071 dprintk("RPC: %5u %s: XDR representation not a multiple of" 2101 dprintk("RPC: %5u %s: XDR representation not a multiple of"
2072 " 4 bytes: 0x%x\n", task->tk_pid, __func__, 2102 " 4 bytes: 0x%x\n", task->tk_pid, __func__,
2073 task->tk_rqstp->rq_rcv_buf.len); 2103 task->tk_rqstp->rq_rcv_buf.len);
2074 goto out_eio; 2104 error = -EIO;
2105 goto out_err;
2075 } 2106 }
2076 if ((len -= 3) < 0) 2107 if ((len -= 3) < 0)
2077 goto out_overflow; 2108 goto out_overflow;
@@ -2080,6 +2111,7 @@ rpc_verify_header(struct rpc_task *task)
2080 if ((n = ntohl(*p++)) != RPC_REPLY) { 2111 if ((n = ntohl(*p++)) != RPC_REPLY) {
2081 dprintk("RPC: %5u %s: not an RPC reply: %x\n", 2112 dprintk("RPC: %5u %s: not an RPC reply: %x\n",
2082 task->tk_pid, __func__, n); 2113 task->tk_pid, __func__, n);
2114 error = -EIO;
2083 goto out_garbage; 2115 goto out_garbage;
2084 } 2116 }
2085 2117
@@ -2098,7 +2130,8 @@ rpc_verify_header(struct rpc_task *task)
2098 dprintk("RPC: %5u %s: RPC call rejected, " 2130 dprintk("RPC: %5u %s: RPC call rejected, "
2099 "unknown error: %x\n", 2131 "unknown error: %x\n",
2100 task->tk_pid, __func__, n); 2132 task->tk_pid, __func__, n);
2101 goto out_eio; 2133 error = -EIO;
2134 goto out_err;
2102 } 2135 }
2103 if (--len < 0) 2136 if (--len < 0)
2104 goto out_overflow; 2137 goto out_overflow;
@@ -2143,9 +2176,11 @@ rpc_verify_header(struct rpc_task *task)
2143 task->tk_pid, __func__, n); 2176 task->tk_pid, __func__, n);
2144 goto out_err; 2177 goto out_err;
2145 } 2178 }
2146 if (!(p = rpcauth_checkverf(task, p))) { 2179 p = rpcauth_checkverf(task, p);
2147 dprintk("RPC: %5u %s: auth check failed\n", 2180 if (IS_ERR(p)) {
2148 task->tk_pid, __func__); 2181 error = PTR_ERR(p);
2182 dprintk("RPC: %5u %s: auth check failed with %d\n",
2183 task->tk_pid, __func__, error);
2149 goto out_garbage; /* bad verifier, retry */ 2184 goto out_garbage; /* bad verifier, retry */
2150 } 2185 }
2151 len = p - (__be32 *)iov->iov_base - 1; 2186 len = p - (__be32 *)iov->iov_base - 1;
@@ -2198,8 +2233,6 @@ out_garbage:
2198out_retry: 2233out_retry:
2199 return ERR_PTR(-EAGAIN); 2234 return ERR_PTR(-EAGAIN);
2200 } 2235 }
2201out_eio:
2202 error = -EIO;
2203out_err: 2236out_err:
2204 rpc_exit(task, error); 2237 rpc_exit(task, error);
2205 dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid, 2238 dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
@@ -2271,7 +2304,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
2271 printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n", 2304 printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
2272 task->tk_pid, task->tk_flags, task->tk_status, 2305 task->tk_pid, task->tk_flags, task->tk_status,
2273 clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops, 2306 clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
2274 clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task), 2307 clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
2275 task->tk_action, rpc_waitq); 2308 task->tk_action, rpc_waitq);
2276} 2309}
2277 2310
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 74d948f5d5a1..779742cfc1ff 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -23,6 +23,7 @@ struct sunrpc_net {
23 struct rpc_clnt *rpcb_local_clnt4; 23 struct rpc_clnt *rpcb_local_clnt4;
24 spinlock_t rpcb_clnt_lock; 24 spinlock_t rpcb_clnt_lock;
25 unsigned int rpcb_users; 25 unsigned int rpcb_users;
26 unsigned int rpcb_is_af_local : 1;
26 27
27 struct mutex gssp_lock; 28 struct mutex gssp_lock;
28 wait_queue_head_t gssp_wq; 29 wait_queue_head_t gssp_wq;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index e7ce4b3eb0bd..f94567b45bb3 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -409,7 +409,7 @@ rpc_show_info(struct seq_file *m, void *v)
409 rcu_read_lock(); 409 rcu_read_lock();
410 seq_printf(m, "RPC server: %s\n", 410 seq_printf(m, "RPC server: %s\n",
411 rcu_dereference(clnt->cl_xprt)->servername); 411 rcu_dereference(clnt->cl_xprt)->servername);
412 seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, 412 seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_program->name,
413 clnt->cl_prog, clnt->cl_vers); 413 clnt->cl_prog, clnt->cl_vers);
414 seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); 414 seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
415 seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); 415 seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
@@ -656,20 +656,17 @@ static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry)
656} 656}
657 657
658static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, 658static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
659 struct qstr *name) 659 const char *name)
660{ 660{
661 struct dentry *dentry; 661 struct qstr q = QSTR_INIT(name, strlen(name));
662 662 struct dentry *dentry = d_hash_and_lookup(parent, &q);
663 dentry = d_lookup(parent, name);
664 if (!dentry) { 663 if (!dentry) {
665 dentry = d_alloc(parent, name); 664 dentry = d_alloc(parent, &q);
666 if (!dentry) 665 if (!dentry)
667 return ERR_PTR(-ENOMEM); 666 return ERR_PTR(-ENOMEM);
668 } 667 }
669 if (dentry->d_inode == NULL) { 668 if (dentry->d_inode == NULL)
670 d_set_d_op(dentry, &rpc_dentry_operations);
671 return dentry; 669 return dentry;
672 }
673 dput(dentry); 670 dput(dentry);
674 return ERR_PTR(-EEXIST); 671 return ERR_PTR(-EEXIST);
675} 672}
@@ -689,8 +686,7 @@ static void __rpc_depopulate(struct dentry *parent,
689 for (i = start; i < eof; i++) { 686 for (i = start; i < eof; i++) {
690 name.name = files[i].name; 687 name.name = files[i].name;
691 name.len = strlen(files[i].name); 688 name.len = strlen(files[i].name);
692 name.hash = full_name_hash(name.name, name.len); 689 dentry = d_hash_and_lookup(parent, &name);
693 dentry = d_lookup(parent, &name);
694 690
695 if (dentry == NULL) 691 if (dentry == NULL)
696 continue; 692 continue;
@@ -732,12 +728,7 @@ static int rpc_populate(struct dentry *parent,
732 728
733 mutex_lock(&dir->i_mutex); 729 mutex_lock(&dir->i_mutex);
734 for (i = start; i < eof; i++) { 730 for (i = start; i < eof; i++) {
735 struct qstr q; 731 dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
736
737 q.name = files[i].name;
738 q.len = strlen(files[i].name);
739 q.hash = full_name_hash(q.name, q.len);
740 dentry = __rpc_lookup_create_exclusive(parent, &q);
741 err = PTR_ERR(dentry); 732 err = PTR_ERR(dentry);
742 if (IS_ERR(dentry)) 733 if (IS_ERR(dentry))
743 goto out_bad; 734 goto out_bad;
@@ -770,7 +761,7 @@ out_bad:
770} 761}
771 762
772static struct dentry *rpc_mkdir_populate(struct dentry *parent, 763static struct dentry *rpc_mkdir_populate(struct dentry *parent,
773 struct qstr *name, umode_t mode, void *private, 764 const char *name, umode_t mode, void *private,
774 int (*populate)(struct dentry *, void *), void *args_populate) 765 int (*populate)(struct dentry *, void *), void *args_populate)
775{ 766{
776 struct dentry *dentry; 767 struct dentry *dentry;
@@ -841,7 +832,6 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
841 struct dentry *dentry; 832 struct dentry *dentry;
842 struct inode *dir = parent->d_inode; 833 struct inode *dir = parent->d_inode;
843 umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR; 834 umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR;
844 struct qstr q;
845 int err; 835 int err;
846 836
847 if (pipe->ops->upcall == NULL) 837 if (pipe->ops->upcall == NULL)
@@ -849,12 +839,8 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
849 if (pipe->ops->downcall == NULL) 839 if (pipe->ops->downcall == NULL)
850 umode &= ~S_IWUGO; 840 umode &= ~S_IWUGO;
851 841
852 q.name = name;
853 q.len = strlen(name);
854 q.hash = full_name_hash(q.name, q.len),
855
856 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 842 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
857 dentry = __rpc_lookup_create_exclusive(parent, &q); 843 dentry = __rpc_lookup_create_exclusive(parent, name);
858 if (IS_ERR(dentry)) 844 if (IS_ERR(dentry))
859 goto out; 845 goto out;
860 err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops, 846 err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops,
@@ -898,6 +884,159 @@ rpc_unlink(struct dentry *dentry)
898} 884}
899EXPORT_SYMBOL_GPL(rpc_unlink); 885EXPORT_SYMBOL_GPL(rpc_unlink);
900 886
887/**
888 * rpc_init_pipe_dir_head - initialise a struct rpc_pipe_dir_head
889 * @pdh: pointer to struct rpc_pipe_dir_head
890 */
891void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh)
892{
893 INIT_LIST_HEAD(&pdh->pdh_entries);
894 pdh->pdh_dentry = NULL;
895}
896EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_head);
897
898/**
899 * rpc_init_pipe_dir_object - initialise a struct rpc_pipe_dir_object
900 * @pdo: pointer to struct rpc_pipe_dir_object
901 * @pdo_ops: pointer to const struct rpc_pipe_dir_object_ops
902 * @pdo_data: pointer to caller-defined data
903 */
904void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo,
905 const struct rpc_pipe_dir_object_ops *pdo_ops,
906 void *pdo_data)
907{
908 INIT_LIST_HEAD(&pdo->pdo_head);
909 pdo->pdo_ops = pdo_ops;
910 pdo->pdo_data = pdo_data;
911}
912EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_object);
913
914static int
915rpc_add_pipe_dir_object_locked(struct net *net,
916 struct rpc_pipe_dir_head *pdh,
917 struct rpc_pipe_dir_object *pdo)
918{
919 int ret = 0;
920
921 if (pdh->pdh_dentry)
922 ret = pdo->pdo_ops->create(pdh->pdh_dentry, pdo);
923 if (ret == 0)
924 list_add_tail(&pdo->pdo_head, &pdh->pdh_entries);
925 return ret;
926}
927
928static void
929rpc_remove_pipe_dir_object_locked(struct net *net,
930 struct rpc_pipe_dir_head *pdh,
931 struct rpc_pipe_dir_object *pdo)
932{
933 if (pdh->pdh_dentry)
934 pdo->pdo_ops->destroy(pdh->pdh_dentry, pdo);
935 list_del_init(&pdo->pdo_head);
936}
937
938/**
939 * rpc_add_pipe_dir_object - associate a rpc_pipe_dir_object to a directory
940 * @net: pointer to struct net
941 * @pdh: pointer to struct rpc_pipe_dir_head
942 * @pdo: pointer to struct rpc_pipe_dir_object
943 *
944 */
945int
946rpc_add_pipe_dir_object(struct net *net,
947 struct rpc_pipe_dir_head *pdh,
948 struct rpc_pipe_dir_object *pdo)
949{
950 int ret = 0;
951
952 if (list_empty(&pdo->pdo_head)) {
953 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
954
955 mutex_lock(&sn->pipefs_sb_lock);
956 ret = rpc_add_pipe_dir_object_locked(net, pdh, pdo);
957 mutex_unlock(&sn->pipefs_sb_lock);
958 }
959 return ret;
960}
961EXPORT_SYMBOL_GPL(rpc_add_pipe_dir_object);
962
963/**
964 * rpc_remove_pipe_dir_object - remove a rpc_pipe_dir_object from a directory
965 * @net: pointer to struct net
966 * @pdh: pointer to struct rpc_pipe_dir_head
967 * @pdo: pointer to struct rpc_pipe_dir_object
968 *
969 */
970void
971rpc_remove_pipe_dir_object(struct net *net,
972 struct rpc_pipe_dir_head *pdh,
973 struct rpc_pipe_dir_object *pdo)
974{
975 if (!list_empty(&pdo->pdo_head)) {
976 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
977
978 mutex_lock(&sn->pipefs_sb_lock);
979 rpc_remove_pipe_dir_object_locked(net, pdh, pdo);
980 mutex_unlock(&sn->pipefs_sb_lock);
981 }
982}
983EXPORT_SYMBOL_GPL(rpc_remove_pipe_dir_object);
984
985/**
986 * rpc_find_or_alloc_pipe_dir_object
987 * @net: pointer to struct net
988 * @pdh: pointer to struct rpc_pipe_dir_head
989 * @match: match struct rpc_pipe_dir_object to data
990 * @alloc: allocate a new struct rpc_pipe_dir_object
991 * @data: user defined data for match() and alloc()
992 *
993 */
994struct rpc_pipe_dir_object *
995rpc_find_or_alloc_pipe_dir_object(struct net *net,
996 struct rpc_pipe_dir_head *pdh,
997 int (*match)(struct rpc_pipe_dir_object *, void *),
998 struct rpc_pipe_dir_object *(*alloc)(void *),
999 void *data)
1000{
1001 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1002 struct rpc_pipe_dir_object *pdo;
1003
1004 mutex_lock(&sn->pipefs_sb_lock);
1005 list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) {
1006 if (!match(pdo, data))
1007 continue;
1008 goto out;
1009 }
1010 pdo = alloc(data);
1011 if (!pdo)
1012 goto out;
1013 rpc_add_pipe_dir_object_locked(net, pdh, pdo);
1014out:
1015 mutex_unlock(&sn->pipefs_sb_lock);
1016 return pdo;
1017}
1018EXPORT_SYMBOL_GPL(rpc_find_or_alloc_pipe_dir_object);
1019
1020static void
1021rpc_create_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
1022{
1023 struct rpc_pipe_dir_object *pdo;
1024 struct dentry *dir = pdh->pdh_dentry;
1025
1026 list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head)
1027 pdo->pdo_ops->create(dir, pdo);
1028}
1029
1030static void
1031rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
1032{
1033 struct rpc_pipe_dir_object *pdo;
1034 struct dentry *dir = pdh->pdh_dentry;
1035
1036 list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head)
1037 pdo->pdo_ops->destroy(dir, pdo);
1038}
1039
901enum { 1040enum {
902 RPCAUTH_info, 1041 RPCAUTH_info,
903 RPCAUTH_EOF 1042 RPCAUTH_EOF
@@ -925,8 +1064,8 @@ static void rpc_clntdir_depopulate(struct dentry *dentry)
925 1064
926/** 1065/**
927 * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs 1066 * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs
928 * @dentry: dentry from the rpc_pipefs root to the new directory 1067 * @dentry: the parent of new directory
929 * @name: &struct qstr for the name 1068 * @name: the name of new directory
930 * @rpc_client: rpc client to associate with this directory 1069 * @rpc_client: rpc client to associate with this directory
931 * 1070 *
932 * This creates a directory at the given @path associated with 1071 * This creates a directory at the given @path associated with
@@ -935,19 +1074,32 @@ static void rpc_clntdir_depopulate(struct dentry *dentry)
935 * later be created using rpc_mkpipe(). 1074 * later be created using rpc_mkpipe().
936 */ 1075 */
937struct dentry *rpc_create_client_dir(struct dentry *dentry, 1076struct dentry *rpc_create_client_dir(struct dentry *dentry,
938 struct qstr *name, 1077 const char *name,
939 struct rpc_clnt *rpc_client) 1078 struct rpc_clnt *rpc_client)
940{ 1079{
941 return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, 1080 struct dentry *ret;
1081
1082 ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
942 rpc_clntdir_populate, rpc_client); 1083 rpc_clntdir_populate, rpc_client);
1084 if (!IS_ERR(ret)) {
1085 rpc_client->cl_pipedir_objects.pdh_dentry = ret;
1086 rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
1087 }
1088 return ret;
943} 1089}
944 1090
945/** 1091/**
946 * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() 1092 * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir()
947 * @dentry: dentry for the pipe 1093 * @rpc_client: rpc_client for the pipe
948 */ 1094 */
949int rpc_remove_client_dir(struct dentry *dentry) 1095int rpc_remove_client_dir(struct rpc_clnt *rpc_client)
950{ 1096{
1097 struct dentry *dentry = rpc_client->cl_pipedir_objects.pdh_dentry;
1098
1099 if (dentry == NULL)
1100 return 0;
1101 rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
1102 rpc_client->cl_pipedir_objects.pdh_dentry = NULL;
951 return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); 1103 return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate);
952} 1104}
953 1105
@@ -981,7 +1133,7 @@ static void rpc_cachedir_depopulate(struct dentry *dentry)
981 rpc_depopulate(dentry, cache_pipefs_files, 0, 3); 1133 rpc_depopulate(dentry, cache_pipefs_files, 0, 3);
982} 1134}
983 1135
984struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name, 1136struct dentry *rpc_create_cache_dir(struct dentry *parent, const char *name,
985 umode_t umode, struct cache_detail *cd) 1137 umode_t umode, struct cache_detail *cd)
986{ 1138{
987 return rpc_mkdir_populate(parent, name, umode, NULL, 1139 return rpc_mkdir_populate(parent, name, umode, NULL,
@@ -1061,9 +1213,7 @@ struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
1061 const unsigned char *dir_name) 1213 const unsigned char *dir_name)
1062{ 1214{
1063 struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name)); 1215 struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name));
1064 1216 return d_hash_and_lookup(sb->s_root, &dir);
1065 dir.hash = full_name_hash(dir.name, dir.len);
1066 return d_lookup(sb->s_root, &dir);
1067} 1217}
1068EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); 1218EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
1069 1219
@@ -1116,6 +1266,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1116 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 1266 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1117 sb->s_magic = RPCAUTH_GSSMAGIC; 1267 sb->s_magic = RPCAUTH_GSSMAGIC;
1118 sb->s_op = &s_ops; 1268 sb->s_op = &s_ops;
1269 sb->s_d_op = &rpc_dentry_operations;
1119 sb->s_time_gran = 1; 1270 sb->s_time_gran = 1;
1120 1271
1121 inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); 1272 inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
@@ -1126,6 +1277,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1126 return -ENOMEM; 1277 return -ENOMEM;
1127 dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", 1278 dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n",
1128 net, NET_NAME(net)); 1279 net, NET_NAME(net));
1280 mutex_lock(&sn->pipefs_sb_lock);
1129 sn->pipefs_sb = sb; 1281 sn->pipefs_sb = sb;
1130 err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, 1282 err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
1131 RPC_PIPEFS_MOUNT, 1283 RPC_PIPEFS_MOUNT,
@@ -1133,6 +1285,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1133 if (err) 1285 if (err)
1134 goto err_depopulate; 1286 goto err_depopulate;
1135 sb->s_fs_info = get_net(net); 1287 sb->s_fs_info = get_net(net);
1288 mutex_unlock(&sn->pipefs_sb_lock);
1136 return 0; 1289 return 0;
1137 1290
1138err_depopulate: 1291err_depopulate:
@@ -1141,6 +1294,7 @@ err_depopulate:
1141 sb); 1294 sb);
1142 sn->pipefs_sb = NULL; 1295 sn->pipefs_sb = NULL;
1143 __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); 1296 __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
1297 mutex_unlock(&sn->pipefs_sb_lock);
1144 return err; 1298 return err;
1145} 1299}
1146 1300
@@ -1162,12 +1316,12 @@ static void rpc_kill_sb(struct super_block *sb)
1162 goto out; 1316 goto out;
1163 } 1317 }
1164 sn->pipefs_sb = NULL; 1318 sn->pipefs_sb = NULL;
1165 mutex_unlock(&sn->pipefs_sb_lock);
1166 dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", 1319 dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n",
1167 net, NET_NAME(net)); 1320 net, NET_NAME(net));
1168 blocking_notifier_call_chain(&rpc_pipefs_notifier_list, 1321 blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
1169 RPC_PIPEFS_UMOUNT, 1322 RPC_PIPEFS_UMOUNT,
1170 sb); 1323 sb);
1324 mutex_unlock(&sn->pipefs_sb_lock);
1171 put_net(net); 1325 put_net(net);
1172out: 1326out:
1173 kill_litter_super(sb); 1327 kill_litter_super(sb);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 3df764dc330c..1891a1022c17 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net)
204} 204}
205 205
206static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, 206static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
207 struct rpc_clnt *clnt4) 207 struct rpc_clnt *clnt4,
208 bool is_af_local)
208{ 209{
209 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); 210 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
210 211
211 /* Protected by rpcb_create_local_mutex */ 212 /* Protected by rpcb_create_local_mutex */
212 sn->rpcb_local_clnt = clnt; 213 sn->rpcb_local_clnt = clnt;
213 sn->rpcb_local_clnt4 = clnt4; 214 sn->rpcb_local_clnt4 = clnt4;
215 sn->rpcb_is_af_local = is_af_local ? 1 : 0;
214 smp_wmb(); 216 smp_wmb();
215 sn->rpcb_users = 1; 217 sn->rpcb_users = 1;
216 dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " 218 dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: "
@@ -238,6 +240,14 @@ static int rpcb_create_local_unix(struct net *net)
238 .program = &rpcb_program, 240 .program = &rpcb_program,
239 .version = RPCBVERS_2, 241 .version = RPCBVERS_2,
240 .authflavor = RPC_AUTH_NULL, 242 .authflavor = RPC_AUTH_NULL,
243 /*
244 * We turn off the idle timeout to prevent the kernel
245 * from automatically disconnecting the socket.
246 * Otherwise, we'd have to cache the mount namespace
247 * of the caller and somehow pass that to the socket
248 * reconnect code.
249 */
250 .flags = RPC_CLNT_CREATE_NO_IDLE_TIMEOUT,
241 }; 251 };
242 struct rpc_clnt *clnt, *clnt4; 252 struct rpc_clnt *clnt, *clnt4;
243 int result = 0; 253 int result = 0;
@@ -263,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net)
263 clnt4 = NULL; 273 clnt4 = NULL;
264 } 274 }
265 275
266 rpcb_set_local(net, clnt, clnt4); 276 rpcb_set_local(net, clnt, clnt4, true);
267 277
268out: 278out:
269 return result; 279 return result;
@@ -315,7 +325,7 @@ static int rpcb_create_local_net(struct net *net)
315 clnt4 = NULL; 325 clnt4 = NULL;
316 } 326 }
317 327
318 rpcb_set_local(net, clnt, clnt4); 328 rpcb_set_local(net, clnt, clnt4, false);
319 329
320out: 330out:
321 return result; 331 return result;
@@ -376,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname,
376 return rpc_create(&args); 386 return rpc_create(&args);
377} 387}
378 388
379static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) 389static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set)
380{ 390{
381 int result, error = 0; 391 int flags = RPC_TASK_NOCONNECT;
392 int error, result = 0;
382 393
394 if (is_set || !sn->rpcb_is_af_local)
395 flags = RPC_TASK_SOFTCONN;
383 msg->rpc_resp = &result; 396 msg->rpc_resp = &result;
384 397
385 error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); 398 error = rpc_call_sync(clnt, msg, flags);
386 if (error < 0) { 399 if (error < 0) {
387 dprintk("RPC: failed to contact local rpcbind " 400 dprintk("RPC: failed to contact local rpcbind "
388 "server (errno %d).\n", -error); 401 "server (errno %d).\n", -error);
@@ -439,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short
439 .rpc_argp = &map, 452 .rpc_argp = &map,
440 }; 453 };
441 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); 454 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
455 bool is_set = false;
442 456
443 dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " 457 dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
444 "rpcbind\n", (port ? "" : "un"), 458 "rpcbind\n", (port ? "" : "un"),
445 prog, vers, prot, port); 459 prog, vers, prot, port);
446 460
447 msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; 461 msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET];
448 if (port) 462 if (port != 0) {
449 msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; 463 msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
464 is_set = true;
465 }
450 466
451 return rpcb_register_call(sn->rpcb_local_clnt, &msg); 467 return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set);
452} 468}
453 469
454/* 470/*
@@ -461,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn,
461 const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; 477 const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
462 struct rpcbind_args *map = msg->rpc_argp; 478 struct rpcbind_args *map = msg->rpc_argp;
463 unsigned short port = ntohs(sin->sin_port); 479 unsigned short port = ntohs(sin->sin_port);
480 bool is_set = false;
464 int result; 481 int result;
465 482
466 map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); 483 map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
@@ -471,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn,
471 map->r_addr, map->r_netid); 488 map->r_addr, map->r_netid);
472 489
473 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; 490 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
474 if (port) 491 if (port != 0) {
475 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; 492 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
493 is_set = true;
494 }
476 495
477 result = rpcb_register_call(sn->rpcb_local_clnt4, msg); 496 result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set);
478 kfree(map->r_addr); 497 kfree(map->r_addr);
479 return result; 498 return result;
480} 499}
@@ -489,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn,
489 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; 508 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
490 struct rpcbind_args *map = msg->rpc_argp; 509 struct rpcbind_args *map = msg->rpc_argp;
491 unsigned short port = ntohs(sin6->sin6_port); 510 unsigned short port = ntohs(sin6->sin6_port);
511 bool is_set = false;
492 int result; 512 int result;
493 513
494 map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); 514 map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
@@ -499,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn,
499 map->r_addr, map->r_netid); 519 map->r_addr, map->r_netid);
500 520
501 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; 521 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
502 if (port) 522 if (port != 0) {
503 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; 523 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
524 is_set = true;
525 }
504 526
505 result = rpcb_register_call(sn->rpcb_local_clnt4, msg); 527 result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set);
506 kfree(map->r_addr); 528 kfree(map->r_addr);
507 return result; 529 return result;
508} 530}
@@ -519,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn,
519 map->r_addr = ""; 541 map->r_addr = "";
520 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; 542 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
521 543
522 return rpcb_register_call(sn->rpcb_local_clnt4, msg); 544 return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false);
523} 545}
524 546
525/** 547/**
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5356b120dbf8..ff3cc4bf4b24 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -254,11 +254,11 @@ static int rpc_wait_bit_killable(void *word)
254{ 254{
255 if (fatal_signal_pending(current)) 255 if (fatal_signal_pending(current))
256 return -ERESTARTSYS; 256 return -ERESTARTSYS;
257 freezable_schedule(); 257 freezable_schedule_unsafe();
258 return 0; 258 return 0;
259} 259}
260 260
261#ifdef RPC_DEBUG 261#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
262static void rpc_task_set_debuginfo(struct rpc_task *task) 262static void rpc_task_set_debuginfo(struct rpc_task *task)
263{ 263{
264 static atomic_t rpc_pid; 264 static atomic_t rpc_pid;
@@ -446,20 +446,6 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r
446} 446}
447 447
448/* 448/*
449 * Tests whether rpc queue is empty
450 */
451int rpc_queue_empty(struct rpc_wait_queue *queue)
452{
453 int res;
454
455 spin_lock_bh(&queue->lock);
456 res = queue->qlen;
457 spin_unlock_bh(&queue->lock);
458 return res == 0;
459}
460EXPORT_SYMBOL_GPL(rpc_queue_empty);
461
462/*
463 * Wake up a task on a specific queue 449 * Wake up a task on a specific queue
464 */ 450 */
465void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task) 451void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
@@ -804,7 +790,6 @@ static void __rpc_execute(struct rpc_task *task)
804 task->tk_flags |= RPC_TASK_KILLED; 790 task->tk_flags |= RPC_TASK_KILLED;
805 rpc_exit(task, -ERESTARTSYS); 791 rpc_exit(task, -ERESTARTSYS);
806 } 792 }
807 rpc_set_running(task);
808 dprintk("RPC: %5u sync task resuming\n", task->tk_pid); 793 dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
809 } 794 }
810 795
@@ -825,9 +810,11 @@ static void __rpc_execute(struct rpc_task *task)
825 */ 810 */
826void rpc_execute(struct rpc_task *task) 811void rpc_execute(struct rpc_task *task)
827{ 812{
813 bool is_async = RPC_IS_ASYNC(task);
814
828 rpc_set_active(task); 815 rpc_set_active(task);
829 rpc_make_runnable(task); 816 rpc_make_runnable(task);
830 if (!RPC_IS_ASYNC(task)) 817 if (!is_async)
831 __rpc_execute(task); 818 __rpc_execute(task);
832} 819}
833 820
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 21b75cb08c03..54530490944e 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -188,7 +188,7 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
188 188
189 seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS); 189 seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS);
190 seq_printf(seq, "p/v: %u/%u (%s)\n", 190 seq_printf(seq, "p/v: %u/%u (%s)\n",
191 clnt->cl_prog, clnt->cl_vers, clnt->cl_protname); 191 clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name);
192 192
193 rcu_read_lock(); 193 rcu_read_lock();
194 xprt = rcu_dereference(clnt->cl_xprt); 194 xprt = rcu_dereference(clnt->cl_xprt);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 89a588b4478b..b974571126fe 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -740,7 +740,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
740 740
741 __module_get(serv->sv_module); 741 __module_get(serv->sv_module);
742 task = kthread_create_on_node(serv->sv_function, rqstp, 742 task = kthread_create_on_node(serv->sv_function, rqstp,
743 node, serv->sv_name); 743 node, "%s", serv->sv_name);
744 if (IS_ERR(task)) { 744 if (IS_ERR(task)) {
745 error = PTR_ERR(task); 745 error = PTR_ERR(task);
746 module_put(serv->sv_module); 746 module_put(serv->sv_module);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 06bdf5a1082c..621ca7b4a155 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -347,13 +347,13 @@ ip_map_cached_get(struct svc_xprt *xprt)
347 spin_lock(&xprt->xpt_lock); 347 spin_lock(&xprt->xpt_lock);
348 ipm = xprt->xpt_auth_cache; 348 ipm = xprt->xpt_auth_cache;
349 if (ipm != NULL) { 349 if (ipm != NULL) {
350 if (!cache_valid(&ipm->h)) { 350 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
351 if (cache_is_expired(sn->ip_map_cache, &ipm->h)) {
351 /* 352 /*
352 * The entry has been invalidated since it was 353 * The entry has been invalidated since it was
353 * remembered, e.g. by a second mount from the 354 * remembered, e.g. by a second mount from the
354 * same IP address. 355 * same IP address.
355 */ 356 */
356 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
357 xprt->xpt_auth_cache = NULL; 357 xprt->xpt_auth_cache = NULL;
358 spin_unlock(&xprt->xpt_lock); 358 spin_unlock(&xprt->xpt_lock);
359 cache_put(&ipm->h, sn->ip_map_cache); 359 cache_put(&ipm->h, sn->ip_map_cache);
@@ -493,8 +493,6 @@ static int unix_gid_parse(struct cache_detail *cd,
493 if (rv) 493 if (rv)
494 return -EINVAL; 494 return -EINVAL;
495 uid = make_kuid(&init_user_ns, id); 495 uid = make_kuid(&init_user_ns, id);
496 if (!uid_valid(uid))
497 return -EINVAL;
498 ug.uid = uid; 496 ug.uid = uid;
499 497
500 expiry = get_expiry(&mesg); 498 expiry = get_expiry(&mesg);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0f679df7d072..9c9caaa5e0d3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -442,7 +442,7 @@ static void svc_tcp_write_space(struct sock *sk)
442{ 442{
443 struct socket *sock = sk->sk_socket; 443 struct socket *sock = sk->sk_socket;
444 444
445 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) 445 if (sk_stream_is_writeable(sk) && sock)
446 clear_bit(SOCK_NOSPACE, &sock->flags); 446 clear_bit(SOCK_NOSPACE, &sock->flags);
447 svc_write_space(sk); 447 svc_write_space(sk);
448} 448}
@@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
917 len = svsk->sk_datalen; 917 len = svsk->sk_datalen;
918 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 918 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
919 for (i = 0; i < npages; i++) { 919 for (i = 0; i < npages; i++) {
920 BUG_ON(svsk->sk_pages[i] == NULL); 920 if (svsk->sk_pages[i] == NULL) {
921 WARN_ON_ONCE(1);
922 continue;
923 }
921 put_page(svsk->sk_pages[i]); 924 put_page(svsk->sk_pages[i]);
922 svsk->sk_pages[i] = NULL; 925 svsk->sk_pages[i] = NULL;
923 } 926 }
@@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1092 goto err_noclose; 1095 goto err_noclose;
1093 } 1096 }
1094 1097
1095 if (svc_sock_reclen(svsk) < 8) 1098 if (svsk->sk_datalen < 8) {
1099 svsk->sk_datalen = 0;
1096 goto err_delete; /* client is nuts. */ 1100 goto err_delete; /* client is nuts. */
1101 }
1097 1102
1098 rqstp->rq_arg.len = svsk->sk_datalen; 1103 rqstp->rq_arg.len = svsk->sk_datalen;
1099 rqstp->rq_arg.page_base = 0; 1104 rqstp->rq_arg.page_base = 0;
@@ -1188,7 +1193,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
1188 if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) 1193 if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
1189 return 1; 1194 return 1;
1190 required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; 1195 required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
1191 if (sk_stream_wspace(svsk->sk_sk) >= required) 1196 if (sk_stream_wspace(svsk->sk_sk) >= required ||
1197 (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
1198 atomic_read(&xprt->xpt_reserved) == 0))
1192 return 1; 1199 return 1;
1193 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 1200 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
1194 return 0; 1201 return 0;
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index af7d339add9d..c99c58e2ee66 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(nlm_debug);
40#ifdef RPC_DEBUG 40#ifdef RPC_DEBUG
41 41
42static struct ctl_table_header *sunrpc_table_header; 42static struct ctl_table_header *sunrpc_table_header;
43static ctl_table sunrpc_table[]; 43static struct ctl_table sunrpc_table[];
44 44
45void 45void
46rpc_register_sysctl(void) 46rpc_register_sysctl(void)
@@ -58,7 +58,7 @@ rpc_unregister_sysctl(void)
58 } 58 }
59} 59}
60 60
61static int proc_do_xprt(ctl_table *table, int write, 61static int proc_do_xprt(struct ctl_table *table, int write,
62 void __user *buffer, size_t *lenp, loff_t *ppos) 62 void __user *buffer, size_t *lenp, loff_t *ppos)
63{ 63{
64 char tmpbuf[256]; 64 char tmpbuf[256];
@@ -73,7 +73,7 @@ static int proc_do_xprt(ctl_table *table, int write,
73} 73}
74 74
75static int 75static int
76proc_dodebug(ctl_table *table, int write, 76proc_dodebug(struct ctl_table *table, int write,
77 void __user *buffer, size_t *lenp, loff_t *ppos) 77 void __user *buffer, size_t *lenp, loff_t *ppos)
78{ 78{
79 char tmpbuf[20], c, *s; 79 char tmpbuf[20], c, *s;
@@ -135,7 +135,7 @@ done:
135} 135}
136 136
137 137
138static ctl_table debug_table[] = { 138static struct ctl_table debug_table[] = {
139 { 139 {
140 .procname = "rpc_debug", 140 .procname = "rpc_debug",
141 .data = &rpc_debug, 141 .data = &rpc_debug,
@@ -173,7 +173,7 @@ static ctl_table debug_table[] = {
173 { } 173 { }
174}; 174};
175 175
176static ctl_table sunrpc_table[] = { 176static struct ctl_table sunrpc_table[] = {
177 { 177 {
178 .procname = "sunrpc", 178 .procname = "sunrpc",
179 .mode = 0555, 179 .mode = 0555,
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 75edcfad6e26..1504bb11e4f3 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -207,10 +207,13 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
207 pgfrom_base -= copy; 207 pgfrom_base -= copy;
208 208
209 vto = kmap_atomic(*pgto); 209 vto = kmap_atomic(*pgto);
210 vfrom = kmap_atomic(*pgfrom); 210 if (*pgto != *pgfrom) {
211 memmove(vto + pgto_base, vfrom + pgfrom_base, copy); 211 vfrom = kmap_atomic(*pgfrom);
212 memcpy(vto + pgto_base, vfrom + pgfrom_base, copy);
213 kunmap_atomic(vfrom);
214 } else
215 memmove(vto + pgto_base, vto + pgfrom_base, copy);
212 flush_dcache_page(*pgto); 216 flush_dcache_page(*pgto);
213 kunmap_atomic(vfrom);
214 kunmap_atomic(vto); 217 kunmap_atomic(vto);
215 218
216 } while ((len -= copy) != 0); 219 } while ((len -= copy) != 0);
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 8343737e85f4..c1b6270262c2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -84,7 +84,7 @@ struct workqueue_struct *svc_rdma_wq;
84 * resets the associated statistic to zero. Any read returns it's 84 * resets the associated statistic to zero. Any read returns it's
85 * current value. 85 * current value.
86 */ 86 */
87static int read_reset_stat(ctl_table *table, int write, 87static int read_reset_stat(struct ctl_table *table, int write,
88 void __user *buffer, size_t *lenp, 88 void __user *buffer, size_t *lenp,
89 loff_t *ppos) 89 loff_t *ppos)
90{ 90{
@@ -119,7 +119,7 @@ static int read_reset_stat(ctl_table *table, int write,
119} 119}
120 120
121static struct ctl_table_header *svcrdma_table_header; 121static struct ctl_table_header *svcrdma_table_header;
122static ctl_table svcrdma_parm_table[] = { 122static struct ctl_table svcrdma_parm_table[] = {
123 { 123 {
124 .procname = "max_requests", 124 .procname = "max_requests",
125 .data = &svcrdma_max_requests, 125 .data = &svcrdma_max_requests,
@@ -214,7 +214,7 @@ static ctl_table svcrdma_parm_table[] = {
214 { }, 214 { },
215}; 215};
216 216
217static ctl_table svcrdma_table[] = { 217static struct ctl_table svcrdma_table[] = {
218 { 218 {
219 .procname = "svc_rdma", 219 .procname = "svc_rdma",
220 .mode = 0555, 220 .mode = 0555,
@@ -223,7 +223,7 @@ static ctl_table svcrdma_table[] = {
223 { }, 223 { },
224}; 224};
225 225
226static ctl_table svcrdma_root_table[] = { 226static struct ctl_table svcrdma_root_table[] = {
227 { 227 {
228 .procname = "sunrpc", 228 .procname = "sunrpc",
229 .mode = 0555, 229 .mode = 0555,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 8d2edddf48cf..65b146297f5a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -98,6 +98,7 @@ void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
98 */ 98 */
99static u32 *decode_write_list(u32 *va, u32 *vaend) 99static u32 *decode_write_list(u32 *va, u32 *vaend)
100{ 100{
101 unsigned long start, end;
101 int nchunks; 102 int nchunks;
102 103
103 struct rpcrdma_write_array *ary = 104 struct rpcrdma_write_array *ary =
@@ -113,9 +114,12 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
113 return NULL; 114 return NULL;
114 } 115 }
115 nchunks = ntohl(ary->wc_nchunks); 116 nchunks = ntohl(ary->wc_nchunks);
116 if (((unsigned long)&ary->wc_array[0] + 117
117 (sizeof(struct rpcrdma_write_chunk) * nchunks)) > 118 start = (unsigned long)&ary->wc_array[0];
118 (unsigned long)vaend) { 119 end = (unsigned long)vaend;
120 if (nchunks < 0 ||
121 nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
122 (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
119 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", 123 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
120 ary, nchunks, vaend); 124 ary, nchunks, vaend);
121 return NULL; 125 return NULL;
@@ -129,6 +133,7 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
129 133
130static u32 *decode_reply_array(u32 *va, u32 *vaend) 134static u32 *decode_reply_array(u32 *va, u32 *vaend)
131{ 135{
136 unsigned long start, end;
132 int nchunks; 137 int nchunks;
133 struct rpcrdma_write_array *ary = 138 struct rpcrdma_write_array *ary =
134 (struct rpcrdma_write_array *)va; 139 (struct rpcrdma_write_array *)va;
@@ -143,9 +148,12 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
143 return NULL; 148 return NULL;
144 } 149 }
145 nchunks = ntohl(ary->wc_nchunks); 150 nchunks = ntohl(ary->wc_nchunks);
146 if (((unsigned long)&ary->wc_array[0] + 151
147 (sizeof(struct rpcrdma_write_chunk) * nchunks)) > 152 start = (unsigned long)&ary->wc_array[0];
148 (unsigned long)vaend) { 153 end = (unsigned long)vaend;
154 if (nchunks < 0 ||
155 nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
156 (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
149 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", 157 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
150 ary, nchunks, vaend); 158 ary, nchunks, vaend);
151 return NULL; 159 return NULL;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 794312f22b9b..285dc0884115 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -86,7 +86,7 @@ static unsigned int max_memreg = RPCRDMA_LAST - 1;
86 86
87static struct ctl_table_header *sunrpc_table_header; 87static struct ctl_table_header *sunrpc_table_header;
88 88
89static ctl_table xr_tunables_table[] = { 89static struct ctl_table xr_tunables_table[] = {
90 { 90 {
91 .procname = "rdma_slot_table_entries", 91 .procname = "rdma_slot_table_entries",
92 .data = &xprt_rdma_slot_table_entries, 92 .data = &xprt_rdma_slot_table_entries,
@@ -138,7 +138,7 @@ static ctl_table xr_tunables_table[] = {
138 { }, 138 { },
139}; 139};
140 140
141static ctl_table sunrpc_table[] = { 141static struct ctl_table sunrpc_table[] = {
142 { 142 {
143 .procname = "sunrpc", 143 .procname = "sunrpc",
144 .mode = 0555, 144 .mode = 0555,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ffd50348a509..ee03d35677d9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,6 +47,8 @@
47#include <net/udp.h> 47#include <net/udp.h>
48#include <net/tcp.h> 48#include <net/tcp.h>
49 49
50#include <trace/events/sunrpc.h>
51
50#include "sunrpc.h" 52#include "sunrpc.h"
51 53
52static void xs_close(struct rpc_xprt *xprt); 54static void xs_close(struct rpc_xprt *xprt);
@@ -87,7 +89,7 @@ static struct ctl_table_header *sunrpc_table_header;
87 * FIXME: changing the UDP slot table size should also resize the UDP 89 * FIXME: changing the UDP slot table size should also resize the UDP
88 * socket buffers for existing UDP transports 90 * socket buffers for existing UDP transports
89 */ 91 */
90static ctl_table xs_tunables_table[] = { 92static struct ctl_table xs_tunables_table[] = {
91 { 93 {
92 .procname = "udp_slot_table_entries", 94 .procname = "udp_slot_table_entries",
93 .data = &xprt_udp_slot_table_entries, 95 .data = &xprt_udp_slot_table_entries,
@@ -143,7 +145,7 @@ static ctl_table xs_tunables_table[] = {
143 { }, 145 { },
144}; 146};
145 147
146static ctl_table sunrpc_table[] = { 148static struct ctl_table sunrpc_table[] = {
147 { 149 {
148 .procname = "sunrpc", 150 .procname = "sunrpc",
149 .mode = 0555, 151 .mode = 0555,
@@ -665,8 +667,10 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
665 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 667 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
666 struct socket *sock = transport->sock; 668 struct socket *sock = transport->sock;
667 669
668 if (sock != NULL) 670 if (sock != NULL) {
669 kernel_sock_shutdown(sock, SHUT_WR); 671 kernel_sock_shutdown(sock, SHUT_WR);
672 trace_rpc_socket_shutdown(xprt, sock);
673 }
670} 674}
671 675
672/** 676/**
@@ -811,6 +815,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
811 815
812 sk->sk_no_check = 0; 816 sk->sk_no_check = 0;
813 817
818 trace_rpc_socket_close(&transport->xprt, sock);
814 sock_release(sock); 819 sock_release(sock);
815} 820}
816 821
@@ -1492,6 +1497,7 @@ static void xs_tcp_state_change(struct sock *sk)
1492 sock_flag(sk, SOCK_ZAPPED), 1497 sock_flag(sk, SOCK_ZAPPED),
1493 sk->sk_shutdown); 1498 sk->sk_shutdown);
1494 1499
1500 trace_rpc_socket_state_change(xprt, sk->sk_socket);
1495 switch (sk->sk_state) { 1501 switch (sk->sk_state) {
1496 case TCP_ESTABLISHED: 1502 case TCP_ESTABLISHED:
1497 spin_lock(&xprt->transport_lock); 1503 spin_lock(&xprt->transport_lock);
@@ -1602,7 +1608,7 @@ static void xs_tcp_write_space(struct sock *sk)
1602 read_lock_bh(&sk->sk_callback_lock); 1608 read_lock_bh(&sk->sk_callback_lock);
1603 1609
1604 /* from net/core/stream.c:sk_stream_write_space */ 1610 /* from net/core/stream.c:sk_stream_write_space */
1605 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 1611 if (sk_stream_is_writeable(sk))
1606 xs_write_space(sk); 1612 xs_write_space(sk);
1607 1613
1608 read_unlock_bh(&sk->sk_callback_lock); 1614 read_unlock_bh(&sk->sk_callback_lock);
@@ -1896,6 +1902,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
1896 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); 1902 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
1897 1903
1898 status = xs_local_finish_connecting(xprt, sock); 1904 status = xs_local_finish_connecting(xprt, sock);
1905 trace_rpc_socket_connect(xprt, sock, status);
1899 switch (status) { 1906 switch (status) {
1900 case 0: 1907 case 0:
1901 dprintk("RPC: xprt %p connected to %s\n", 1908 dprintk("RPC: xprt %p connected to %s\n",
@@ -2039,6 +2046,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
2039 xprt->address_strings[RPC_DISPLAY_PORT]); 2046 xprt->address_strings[RPC_DISPLAY_PORT]);
2040 2047
2041 xs_udp_finish_connecting(xprt, sock); 2048 xs_udp_finish_connecting(xprt, sock);
2049 trace_rpc_socket_connect(xprt, sock, 0);
2042 status = 0; 2050 status = 0;
2043out: 2051out:
2044 xprt_clear_connecting(xprt); 2052 xprt_clear_connecting(xprt);
@@ -2064,6 +2072,8 @@ static void xs_abort_connection(struct sock_xprt *transport)
2064 memset(&any, 0, sizeof(any)); 2072 memset(&any, 0, sizeof(any));
2065 any.sa_family = AF_UNSPEC; 2073 any.sa_family = AF_UNSPEC;
2066 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 2074 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
2075 trace_rpc_socket_reset_connection(&transport->xprt,
2076 transport->sock, result);
2067 if (!result) 2077 if (!result)
2068 xs_sock_reset_connection_flags(&transport->xprt); 2078 xs_sock_reset_connection_flags(&transport->xprt);
2069 dprintk("RPC: AF_UNSPEC connect return code %d\n", result); 2079 dprintk("RPC: AF_UNSPEC connect return code %d\n", result);
@@ -2194,6 +2204,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2194 xprt->address_strings[RPC_DISPLAY_PORT]); 2204 xprt->address_strings[RPC_DISPLAY_PORT]);
2195 2205
2196 status = xs_tcp_finish_connecting(xprt, sock); 2206 status = xs_tcp_finish_connecting(xprt, sock);
2207 trace_rpc_socket_connect(xprt, sock, status);
2197 dprintk("RPC: %p connect status %d connected %d sock state %d\n", 2208 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
2198 xprt, -status, xprt_connected(xprt), 2209 xprt, -status, xprt_connected(xprt),
2199 sock->sk->sk_state); 2210 sock->sk->sk_state);
@@ -2534,7 +2545,6 @@ static struct rpc_xprt_ops bc_tcp_ops = {
2534 .reserve_xprt = xprt_reserve_xprt, 2545 .reserve_xprt = xprt_reserve_xprt,
2535 .release_xprt = xprt_release_xprt, 2546 .release_xprt = xprt_release_xprt,
2536 .alloc_slot = xprt_alloc_slot, 2547 .alloc_slot = xprt_alloc_slot,
2537 .rpcbind = xs_local_rpcbind,
2538 .buf_alloc = bc_malloc, 2548 .buf_alloc = bc_malloc,
2539 .buf_free = bc_free, 2549 .buf_free = bc_free,
2540 .send_request = bc_send_request, 2550 .send_request = bc_send_request,
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9bc6db04be3e..e7000be321b0 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head,
47 47
48 /* Allow network administrator to have same access as root. */ 48 /* Allow network administrator to have same access as root. */
49 if (ns_capable(net->user_ns, CAP_NET_ADMIN) || 49 if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
50 uid_eq(root_uid, current_uid())) { 50 uid_eq(root_uid, current_euid())) {
51 int mode = (table->mode >> 6) & 7; 51 int mode = (table->mode >> 6) & 7;
52 return (mode << 6) | (mode << 3) | mode; 52 return (mode << 6) | (mode << 3) | mode;
53 } 53 }
54 /* Allow netns root group to have the same access as the root group */ 54 /* Allow netns root group to have the same access as the root group */
55 if (gid_eq(root_gid, current_gid())) { 55 if (in_egroup_p(root_gid)) {
56 int mode = (table->mode >> 3) & 7; 56 int mode = (table->mode >> 3) & 7;
57 return (mode << 3) | mode; 57 return (mode << 3) | mode;
58 } 58 }
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 4df8e02d9008..b282f7130d2b 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -8,6 +8,7 @@ tipc-y += addr.o bcast.o bearer.o config.o \
8 core.o handler.o link.o discover.o msg.o \ 8 core.o handler.o link.o discover.o msg.o \
9 name_distr.o subscr.o name_table.o net.o \ 9 name_distr.o subscr.o name_table.o net.o \
10 netlink.o node.o node_subscr.o port.o ref.o \ 10 netlink.o node.o node_subscr.o port.o ref.o \
11 socket.o log.o eth_media.o 11 socket.o log.o eth_media.o server.o
12 12
13tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o 13tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
14tipc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e5f3da507823..716de1ac6cb5 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -578,8 +578,7 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
578 * Returns 0 (packet sent successfully) under all circumstances, 578 * Returns 0 (packet sent successfully) under all circumstances,
579 * since the broadcast link's pseudo-bearer never blocks 579 * since the broadcast link's pseudo-bearer never blocks
580 */ 580 */
581static int tipc_bcbearer_send(struct sk_buff *buf, 581static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
582 struct tipc_bearer *unused1,
583 struct tipc_media_addr *unused2) 582 struct tipc_media_addr *unused2)
584{ 583{
585 int bp_index; 584 int bp_index;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index a93306557e00..6ee587b469fd 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -75,7 +75,8 @@ void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
75/** 75/**
76 * tipc_nmap_equal - test for equality of node maps 76 * tipc_nmap_equal - test for equality of node maps
77 */ 77 */
78static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b) 78static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
79 struct tipc_node_map *nm_b)
79{ 80{
80 return !memcmp(nm_a, nm_b, sizeof(*nm_a)); 81 return !memcmp(nm_a, nm_b, sizeof(*nm_a));
81} 82}
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index cb29ef7ba2f0..609c30c80816 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -460,6 +460,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr)
460{ 460{
461 struct tipc_link *l_ptr; 461 struct tipc_link *l_ptr;
462 struct tipc_link *temp_l_ptr; 462 struct tipc_link *temp_l_ptr;
463 struct tipc_link_req *temp_req;
463 464
464 pr_info("Disabling bearer <%s>\n", b_ptr->name); 465 pr_info("Disabling bearer <%s>\n", b_ptr->name);
465 spin_lock_bh(&b_ptr->lock); 466 spin_lock_bh(&b_ptr->lock);
@@ -468,9 +469,13 @@ static void bearer_disable(struct tipc_bearer *b_ptr)
468 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { 469 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
469 tipc_link_delete(l_ptr); 470 tipc_link_delete(l_ptr);
470 } 471 }
471 if (b_ptr->link_req) 472 temp_req = b_ptr->link_req;
472 tipc_disc_delete(b_ptr->link_req); 473 b_ptr->link_req = NULL;
473 spin_unlock_bh(&b_ptr->lock); 474 spin_unlock_bh(&b_ptr->lock);
475
476 if (temp_req)
477 tipc_disc_delete(temp_req);
478
474 memset(b_ptr, 0, sizeof(struct tipc_bearer)); 479 memset(b_ptr, 0, sizeof(struct tipc_bearer));
475} 480}
476 481
diff --git a/net/tipc/config.c b/net/tipc/config.c
index f67866c765dd..c301a9a592d8 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -2,7 +2,7 @@
2 * net/tipc/config.c: TIPC configuration management code 2 * net/tipc/config.c: TIPC configuration management code
3 * 3 *
4 * Copyright (c) 2002-2006, Ericsson AB 4 * Copyright (c) 2002-2006, Ericsson AB
5 * Copyright (c) 2004-2007, 2010-2012, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -38,12 +38,12 @@
38#include "port.h" 38#include "port.h"
39#include "name_table.h" 39#include "name_table.h"
40#include "config.h" 40#include "config.h"
41#include "server.h"
41 42
42#define REPLY_TRUNCATED "<truncated>\n" 43#define REPLY_TRUNCATED "<truncated>\n"
43 44
44static u32 config_port_ref; 45static DEFINE_MUTEX(config_mutex);
45 46static struct tipc_server cfgsrv;
46static DEFINE_SPINLOCK(config_lock);
47 47
48static const void *req_tlv_area; /* request message TLV area */ 48static const void *req_tlv_area; /* request message TLV area */
49static int req_tlv_space; /* request message TLV area size */ 49static int req_tlv_space; /* request message TLV area size */
@@ -181,18 +181,7 @@ static struct sk_buff *cfg_set_own_addr(void)
181 if (tipc_own_addr) 181 if (tipc_own_addr)
182 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED 182 return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
183 " (cannot change node address once assigned)"); 183 " (cannot change node address once assigned)");
184
185 /*
186 * Must temporarily release configuration spinlock while switching into
187 * networking mode as it calls tipc_eth_media_start(), which may sleep.
188 * Releasing the lock is harmless as other locally-issued configuration
189 * commands won't occur until this one completes, and remotely-issued
190 * configuration commands can't be received until a local configuration
191 * command to enable the first bearer is received and processed.
192 */
193 spin_unlock_bh(&config_lock);
194 tipc_core_start_net(addr); 184 tipc_core_start_net(addr);
195 spin_lock_bh(&config_lock);
196 return tipc_cfg_reply_none(); 185 return tipc_cfg_reply_none();
197} 186}
198 187
@@ -248,7 +237,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
248{ 237{
249 struct sk_buff *rep_tlv_buf; 238 struct sk_buff *rep_tlv_buf;
250 239
251 spin_lock_bh(&config_lock); 240 mutex_lock(&config_mutex);
252 241
253 /* Save request and reply details in a well-known location */ 242 /* Save request and reply details in a well-known location */
254 req_tlv_area = request_area; 243 req_tlv_area = request_area;
@@ -377,37 +366,31 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
377 366
378 /* Return reply buffer */ 367 /* Return reply buffer */
379exit: 368exit:
380 spin_unlock_bh(&config_lock); 369 mutex_unlock(&config_mutex);
381 return rep_tlv_buf; 370 return rep_tlv_buf;
382} 371}
383 372
384static void cfg_named_msg_event(void *userdata, 373static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr,
385 u32 port_ref, 374 void *usr_data, void *buf, size_t len)
386 struct sk_buff **buf,
387 const unchar *msg,
388 u32 size,
389 u32 importance,
390 struct tipc_portid const *orig,
391 struct tipc_name_seq const *dest)
392{ 375{
393 struct tipc_cfg_msg_hdr *req_hdr; 376 struct tipc_cfg_msg_hdr *req_hdr;
394 struct tipc_cfg_msg_hdr *rep_hdr; 377 struct tipc_cfg_msg_hdr *rep_hdr;
395 struct sk_buff *rep_buf; 378 struct sk_buff *rep_buf;
379 int ret;
396 380
397 /* Validate configuration message header (ignore invalid message) */ 381 /* Validate configuration message header (ignore invalid message) */
398 req_hdr = (struct tipc_cfg_msg_hdr *)msg; 382 req_hdr = (struct tipc_cfg_msg_hdr *)buf;
399 if ((size < sizeof(*req_hdr)) || 383 if ((len < sizeof(*req_hdr)) ||
400 (size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || 384 (len != TCM_ALIGN(ntohl(req_hdr->tcm_len))) ||
401 (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) { 385 (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) {
402 pr_warn("Invalid configuration message discarded\n"); 386 pr_warn("Invalid configuration message discarded\n");
403 return; 387 return;
404 } 388 }
405 389
406 /* Generate reply for request (if can't, return request) */ 390 /* Generate reply for request (if can't, return request) */
407 rep_buf = tipc_cfg_do_cmd(orig->node, 391 rep_buf = tipc_cfg_do_cmd(addr->addr.id.node, ntohs(req_hdr->tcm_type),
408 ntohs(req_hdr->tcm_type), 392 buf + sizeof(*req_hdr),
409 msg + sizeof(*req_hdr), 393 len - sizeof(*req_hdr),
410 size - sizeof(*req_hdr),
411 BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr)); 394 BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr));
412 if (rep_buf) { 395 if (rep_buf) {
413 skb_push(rep_buf, sizeof(*rep_hdr)); 396 skb_push(rep_buf, sizeof(*rep_hdr));
@@ -415,57 +398,51 @@ static void cfg_named_msg_event(void *userdata,
415 memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr)); 398 memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr));
416 rep_hdr->tcm_len = htonl(rep_buf->len); 399 rep_hdr->tcm_len = htonl(rep_buf->len);
417 rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST); 400 rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST);
418 } else {
419 rep_buf = *buf;
420 *buf = NULL;
421 }
422 401
423 /* NEED TO ADD CODE TO HANDLE FAILED SEND (SUCH AS CONGESTION) */ 402 ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data,
424 tipc_send_buf2port(port_ref, orig, rep_buf, rep_buf->len); 403 rep_buf->len);
404 if (ret < 0)
405 pr_err("Sending cfg reply message failed, no memory\n");
406
407 kfree_skb(rep_buf);
408 }
425} 409}
426 410
411static struct sockaddr_tipc cfgsrv_addr __read_mostly = {
412 .family = AF_TIPC,
413 .addrtype = TIPC_ADDR_NAMESEQ,
414 .addr.nameseq.type = TIPC_CFG_SRV,
415 .addr.nameseq.lower = 0,
416 .addr.nameseq.upper = 0,
417 .scope = TIPC_ZONE_SCOPE
418};
419
420static struct tipc_server cfgsrv __read_mostly = {
421 .saddr = &cfgsrv_addr,
422 .imp = TIPC_CRITICAL_IMPORTANCE,
423 .type = SOCK_RDM,
424 .max_rcvbuf_size = 64 * 1024,
425 .name = "cfg_server",
426 .tipc_conn_recvmsg = cfg_conn_msg_event,
427 .tipc_conn_new = NULL,
428 .tipc_conn_shutdown = NULL
429};
430
427int tipc_cfg_init(void) 431int tipc_cfg_init(void)
428{ 432{
429 struct tipc_name_seq seq; 433 return tipc_server_start(&cfgsrv);
430 int res;
431
432 res = tipc_createport(NULL, TIPC_CRITICAL_IMPORTANCE,
433 NULL, NULL, NULL,
434 NULL, cfg_named_msg_event, NULL,
435 NULL, &config_port_ref);
436 if (res)
437 goto failed;
438
439 seq.type = TIPC_CFG_SRV;
440 seq.lower = seq.upper = tipc_own_addr;
441 res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq);
442 if (res)
443 goto failed;
444
445 return 0;
446
447failed:
448 pr_err("Unable to create configuration service\n");
449 return res;
450} 434}
451 435
452void tipc_cfg_reinit(void) 436void tipc_cfg_reinit(void)
453{ 437{
454 struct tipc_name_seq seq; 438 tipc_server_stop(&cfgsrv);
455 int res;
456
457 seq.type = TIPC_CFG_SRV;
458 seq.lower = seq.upper = 0;
459 tipc_withdraw(config_port_ref, TIPC_ZONE_SCOPE, &seq);
460 439
461 seq.lower = seq.upper = tipc_own_addr; 440 cfgsrv_addr.addr.nameseq.lower = tipc_own_addr;
462 res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq); 441 cfgsrv_addr.addr.nameseq.upper = tipc_own_addr;
463 if (res) 442 tipc_server_start(&cfgsrv);
464 pr_err("Unable to reinitialize configuration service\n");
465} 443}
466 444
467void tipc_cfg_stop(void) 445void tipc_cfg_stop(void)
468{ 446{
469 tipc_deleteport(config_port_ref); 447 tipc_server_stop(&cfgsrv);
470 config_port_ref = 0;
471} 448}
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 7ec2c1eb94f1..fd4eeeaa972a 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -2,7 +2,7 @@
2 * net/tipc/core.c: TIPC module code 2 * net/tipc/core.c: TIPC module code
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005-2006, 2010-2011, Wind River Systems 5 * Copyright (c) 2005-2006, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -39,6 +39,7 @@
39#include "name_table.h" 39#include "name_table.h"
40#include "subscr.h" 40#include "subscr.h"
41#include "config.h" 41#include "config.h"
42#include "port.h"
42 43
43#include <linux/module.h> 44#include <linux/module.h>
44 45
@@ -50,7 +51,7 @@ u32 tipc_own_addr __read_mostly;
50int tipc_max_ports __read_mostly; 51int tipc_max_ports __read_mostly;
51int tipc_net_id __read_mostly; 52int tipc_net_id __read_mostly;
52int tipc_remote_management __read_mostly; 53int tipc_remote_management __read_mostly;
53 54int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */
54 55
55/** 56/**
56 * tipc_buf_acquire - creates a TIPC message buffer 57 * tipc_buf_acquire - creates a TIPC message buffer
@@ -118,6 +119,7 @@ static void tipc_core_stop(void)
118 tipc_nametbl_stop(); 119 tipc_nametbl_stop();
119 tipc_ref_table_stop(); 120 tipc_ref_table_stop();
120 tipc_socket_stop(); 121 tipc_socket_stop();
122 tipc_unregister_sysctl();
121} 123}
122 124
123/** 125/**
@@ -135,20 +137,21 @@ static int tipc_core_start(void)
135 if (!res) 137 if (!res)
136 res = tipc_nametbl_init(); 138 res = tipc_nametbl_init();
137 if (!res) 139 if (!res)
138 res = tipc_subscr_start();
139 if (!res)
140 res = tipc_cfg_init();
141 if (!res)
142 res = tipc_netlink_start(); 140 res = tipc_netlink_start();
143 if (!res) 141 if (!res)
144 res = tipc_socket_init(); 142 res = tipc_socket_init();
143 if (!res)
144 res = tipc_register_sysctl();
145 if (!res)
146 res = tipc_subscr_start();
147 if (!res)
148 res = tipc_cfg_init();
145 if (res) 149 if (res)
146 tipc_core_stop(); 150 tipc_core_stop();
147 151
148 return res; 152 return res;
149} 153}
150 154
151
152static int __init tipc_init(void) 155static int __init tipc_init(void)
153{ 156{
154 int res; 157 int res;
@@ -160,6 +163,11 @@ static int __init tipc_init(void)
160 tipc_max_ports = CONFIG_TIPC_PORTS; 163 tipc_max_ports = CONFIG_TIPC_PORTS;
161 tipc_net_id = 4711; 164 tipc_net_id = 4711;
162 165
166 sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE;
167 sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 <<
168 TIPC_CRITICAL_IMPORTANCE;
169 sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT;
170
163 res = tipc_core_start(); 171 res = tipc_core_start();
164 if (res) 172 if (res)
165 pr_err("Unable to start in single node mode\n"); 173 pr_err("Unable to start in single node mode\n");
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 0207db04179a..be72f8cebc53 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -1,8 +1,8 @@
1/* 1/*
2 * net/tipc/core.h: Include file for TIPC global declarations 2 * net/tipc/core.h: Include file for TIPC global declarations
3 * 3 *
4 * Copyright (c) 2005-2006, Ericsson AB 4 * Copyright (c) 2005-2006, 2013 Ericsson AB
5 * Copyright (c) 2005-2007, 2010-2011, Wind River Systems 5 * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -80,6 +80,7 @@ extern u32 tipc_own_addr __read_mostly;
80extern int tipc_max_ports __read_mostly; 80extern int tipc_max_ports __read_mostly;
81extern int tipc_net_id __read_mostly; 81extern int tipc_net_id __read_mostly;
82extern int tipc_remote_management __read_mostly; 82extern int tipc_remote_management __read_mostly;
83extern int sysctl_tipc_rmem[3] __read_mostly;
83 84
84/* 85/*
85 * Other global variables 86 * Other global variables
@@ -96,6 +97,18 @@ extern int tipc_netlink_start(void);
96extern void tipc_netlink_stop(void); 97extern void tipc_netlink_stop(void);
97extern int tipc_socket_init(void); 98extern int tipc_socket_init(void);
98extern void tipc_socket_stop(void); 99extern void tipc_socket_stop(void);
100extern int tipc_sock_create_local(int type, struct socket **res);
101extern void tipc_sock_release_local(struct socket *sock);
102extern int tipc_sock_accept_local(struct socket *sock,
103 struct socket **newsock, int flags);
104
105#ifdef CONFIG_SYSCTL
106extern int tipc_register_sysctl(void);
107extern void tipc_unregister_sysctl(void);
108#else
109#define tipc_register_sysctl() 0
110#define tipc_unregister_sysctl()
111#endif
99 112
100/* 113/*
101 * TIPC timer and signal code 114 * TIPC timer and signal code
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index eedff58d0387..ecc758c6eacf 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -70,8 +70,7 @@ struct tipc_link_req {
70 * @dest_domain: network domain of node(s) which should respond to message 70 * @dest_domain: network domain of node(s) which should respond to message
71 * @b_ptr: ptr to bearer issuing message 71 * @b_ptr: ptr to bearer issuing message
72 */ 72 */
73static struct sk_buff *tipc_disc_init_msg(u32 type, 73static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain,
74 u32 dest_domain,
75 struct tipc_bearer *b_ptr) 74 struct tipc_bearer *b_ptr)
76{ 75{
77 struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); 76 struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE);
@@ -346,8 +345,8 @@ exit:
346 * 345 *
347 * Returns 0 if successful, otherwise -errno. 346 * Returns 0 if successful, otherwise -errno.
348 */ 347 */
349int tipc_disc_create(struct tipc_bearer *b_ptr, 348int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest,
350 struct tipc_media_addr *dest, u32 dest_domain) 349 u32 dest_domain)
351{ 350{
352 struct tipc_link_req *req; 351 struct tipc_link_req *req;
353 352
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 120a676a3360..40ea40cf6204 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -62,7 +62,7 @@ static struct eth_bearer eth_bearers[MAX_ETH_BEARERS];
62static int eth_started; 62static int eth_started;
63 63
64static int recv_notification(struct notifier_block *nb, unsigned long evt, 64static int recv_notification(struct notifier_block *nb, unsigned long evt,
65 void *dv); 65 void *dv);
66/* 66/*
67 * Network device notifier info 67 * Network device notifier info
68 */ 68 */
@@ -162,8 +162,7 @@ static void setup_bearer(struct work_struct *work)
162 */ 162 */
163static int enable_bearer(struct tipc_bearer *tb_ptr) 163static int enable_bearer(struct tipc_bearer *tb_ptr)
164{ 164{
165 struct net_device *dev = NULL; 165 struct net_device *dev;
166 struct net_device *pdev = NULL;
167 struct eth_bearer *eb_ptr = &eth_bearers[0]; 166 struct eth_bearer *eb_ptr = &eth_bearers[0];
168 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS]; 167 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
169 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; 168 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
@@ -178,15 +177,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
178 } 177 }
179 178
180 /* Find device with specified name */ 179 /* Find device with specified name */
181 read_lock(&dev_base_lock); 180 dev = dev_get_by_name(&init_net, driver_name);
182 for_each_netdev(&init_net, pdev) {
183 if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
184 dev = pdev;
185 dev_hold(dev);
186 break;
187 }
188 }
189 read_unlock(&dev_base_lock);
190 if (!dev) 181 if (!dev)
191 return -ENODEV; 182 return -ENODEV;
192 183
@@ -251,9 +242,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr)
251 * specified device. 242 * specified device.
252 */ 243 */
253static int recv_notification(struct notifier_block *nb, unsigned long evt, 244static int recv_notification(struct notifier_block *nb, unsigned long evt,
254 void *dv) 245 void *ptr)
255{ 246{
256 struct net_device *dev = (struct net_device *)dv; 247 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
257 struct eth_bearer *eb_ptr = &eth_bearers[0]; 248 struct eth_bearer *eb_ptr = &eth_bearers[0];
258 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS]; 249 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
259 250
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index 2a2864c25e15..9934a32bfa87 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c
@@ -155,8 +155,7 @@ static void setup_bearer(struct work_struct *work)
155 */ 155 */
156static int enable_bearer(struct tipc_bearer *tb_ptr) 156static int enable_bearer(struct tipc_bearer *tb_ptr)
157{ 157{
158 struct net_device *dev = NULL; 158 struct net_device *dev;
159 struct net_device *pdev = NULL;
160 struct ib_bearer *ib_ptr = &ib_bearers[0]; 159 struct ib_bearer *ib_ptr = &ib_bearers[0];
161 struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; 160 struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
162 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; 161 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
@@ -171,15 +170,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
171 } 170 }
172 171
173 /* Find device with specified name */ 172 /* Find device with specified name */
174 read_lock(&dev_base_lock); 173 dev = dev_get_by_name(&init_net, driver_name);
175 for_each_netdev(&init_net, pdev) {
176 if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
177 dev = pdev;
178 dev_hold(dev);
179 break;
180 }
181 }
182 read_unlock(&dev_base_lock);
183 if (!dev) 174 if (!dev)
184 return -ENODEV; 175 return -ENODEV;
185 176
@@ -244,9 +235,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr)
244 * specified device. 235 * specified device.
245 */ 236 */
246static int recv_notification(struct notifier_block *nb, unsigned long evt, 237static int recv_notification(struct notifier_block *nb, unsigned long evt,
247 void *dv) 238 void *ptr)
248{ 239{
249 struct net_device *dev = (struct net_device *)dv; 240 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
250 struct ib_bearer *ib_ptr = &ib_bearers[0]; 241 struct ib_bearer *ib_ptr = &ib_bearers[0];
251 struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; 242 struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
252 243
@@ -301,13 +292,7 @@ static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
301 if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */ 292 if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */
302 return 1; 293 return 1;
303 294
304 sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:" 295 sprintf(str_buf, "%20phC", a->value);
305 "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
306 a->value[0], a->value[1], a->value[2], a->value[3],
307 a->value[4], a->value[5], a->value[6], a->value[7],
308 a->value[8], a->value[9], a->value[10], a->value[11],
309 a->value[12], a->value[13], a->value[14], a->value[15],
310 a->value[16], a->value[17], a->value[18], a->value[19]);
311 296
312 return 0; 297 return 0;
313} 298}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a80feee5197a..0cc3d9015c5d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2,7 +2,7 @@
2 * net/tipc/link.c: TIPC link code 2 * net/tipc/link.c: TIPC link code
3 * 3 *
4 * Copyright (c) 1996-2007, 2012, Ericsson AB 4 * Copyright (c) 1996-2007, 2012, Ericsson AB
5 * Copyright (c) 2004-2007, 2010-2011, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,8 @@
41#include "discover.h" 41#include "discover.h"
42#include "config.h" 42#include "config.h"
43 43
44#include <linux/pkt_sched.h>
45
44/* 46/*
45 * Error message prefixes 47 * Error message prefixes
46 */ 48 */
@@ -771,8 +773,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
771 * link_bundle_buf(): Append contents of a buffer to 773 * link_bundle_buf(): Append contents of a buffer to
772 * the tail of an existing one. 774 * the tail of an existing one.
773 */ 775 */
774static int link_bundle_buf(struct tipc_link *l_ptr, 776static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler,
775 struct sk_buff *bundler,
776 struct sk_buff *buf) 777 struct sk_buff *buf)
777{ 778{
778 struct tipc_msg *bundler_msg = buf_msg(bundler); 779 struct tipc_msg *bundler_msg = buf_msg(bundler);
@@ -1057,40 +1058,6 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
1057} 1058}
1058 1059
1059/* 1060/*
1060 * tipc_send_buf_fast: Entry for data messages where the
1061 * destination node is known and the header is complete,
1062 * inclusive total message length.
1063 * Returns user data length.
1064 */
1065int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
1066{
1067 struct tipc_link *l_ptr;
1068 struct tipc_node *n_ptr;
1069 int res;
1070 u32 selector = msg_origport(buf_msg(buf)) & 1;
1071 u32 dummy;
1072
1073 read_lock_bh(&tipc_net_lock);
1074 n_ptr = tipc_node_find(destnode);
1075 if (likely(n_ptr)) {
1076 tipc_node_lock(n_ptr);
1077 l_ptr = n_ptr->active_links[selector];
1078 if (likely(l_ptr)) {
1079 res = link_send_buf_fast(l_ptr, buf, &dummy);
1080 tipc_node_unlock(n_ptr);
1081 read_unlock_bh(&tipc_net_lock);
1082 return res;
1083 }
1084 tipc_node_unlock(n_ptr);
1085 }
1086 read_unlock_bh(&tipc_net_lock);
1087 res = msg_data_sz(buf_msg(buf));
1088 tipc_reject_msg(buf, TIPC_ERR_NO_NODE);
1089 return res;
1090}
1091
1092
1093/*
1094 * tipc_link_send_sections_fast: Entry for messages where the 1061 * tipc_link_send_sections_fast: Entry for messages where the
1095 * destination processor is known and the header is complete, 1062 * destination processor is known and the header is complete,
1096 * except for total message length. 1063 * except for total message length.
@@ -1098,8 +1065,7 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
1098 */ 1065 */
1099int tipc_link_send_sections_fast(struct tipc_port *sender, 1066int tipc_link_send_sections_fast(struct tipc_port *sender,
1100 struct iovec const *msg_sect, 1067 struct iovec const *msg_sect,
1101 const u32 num_sect, 1068 const u32 num_sect, unsigned int total_len,
1102 unsigned int total_len,
1103 u32 destaddr) 1069 u32 destaddr)
1104{ 1070{
1105 struct tipc_msg *hdr = &sender->phdr; 1071 struct tipc_msg *hdr = &sender->phdr;
@@ -1115,7 +1081,10 @@ again:
1115 * (Must not hold any locks while building message.) 1081 * (Must not hold any locks while building message.)
1116 */ 1082 */
1117 res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, 1083 res = tipc_msg_build(hdr, msg_sect, num_sect, total_len,
1118 sender->max_pkt, !sender->user_port, &buf); 1084 sender->max_pkt, &buf);
1085 /* Exit if build request was invalid */
1086 if (unlikely(res < 0))
1087 return res;
1119 1088
1120 read_lock_bh(&tipc_net_lock); 1089 read_lock_bh(&tipc_net_lock);
1121 node = tipc_node_find(destaddr); 1090 node = tipc_node_find(destaddr);
@@ -1132,10 +1101,6 @@ exit:
1132 return res; 1101 return res;
1133 } 1102 }
1134 1103
1135 /* Exit if build request was invalid */
1136 if (unlikely(res < 0))
1137 goto exit;
1138
1139 /* Exit if link (or bearer) is congested */ 1104 /* Exit if link (or bearer) is congested */
1140 if (link_congested(l_ptr) || 1105 if (link_congested(l_ptr) ||
1141 tipc_bearer_blocked(l_ptr->b_ptr)) { 1106 tipc_bearer_blocked(l_ptr->b_ptr)) {
@@ -1189,8 +1154,7 @@ exit:
1189 */ 1154 */
1190static int link_send_sections_long(struct tipc_port *sender, 1155static int link_send_sections_long(struct tipc_port *sender,
1191 struct iovec const *msg_sect, 1156 struct iovec const *msg_sect,
1192 u32 num_sect, 1157 u32 num_sect, unsigned int total_len,
1193 unsigned int total_len,
1194 u32 destaddr) 1158 u32 destaddr)
1195{ 1159{
1196 struct tipc_link *l_ptr; 1160 struct tipc_link *l_ptr;
@@ -1204,6 +1168,7 @@ static int link_send_sections_long(struct tipc_port *sender,
1204 const unchar *sect_crs; 1168 const unchar *sect_crs;
1205 int curr_sect; 1169 int curr_sect;
1206 u32 fragm_no; 1170 u32 fragm_no;
1171 int res = 0;
1207 1172
1208again: 1173again:
1209 fragm_no = 1; 1174 fragm_no = 1;
@@ -1250,18 +1215,15 @@ again:
1250 else 1215 else
1251 sz = fragm_rest; 1216 sz = fragm_rest;
1252 1217
1253 if (likely(!sender->user_port)) { 1218 if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) {
1254 if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { 1219 res = -EFAULT;
1255error: 1220error:
1256 for (; buf_chain; buf_chain = buf) { 1221 for (; buf_chain; buf_chain = buf) {
1257 buf = buf_chain->next; 1222 buf = buf_chain->next;
1258 kfree_skb(buf_chain); 1223 kfree_skb(buf_chain);
1259 }
1260 return -EFAULT;
1261 } 1224 }
1262 } else 1225 return res;
1263 skb_copy_to_linear_data_offset(buf, fragm_crs, 1226 }
1264 sect_crs, sz);
1265 sect_crs += sz; 1227 sect_crs += sz;
1266 sect_rest -= sz; 1228 sect_rest -= sz;
1267 fragm_crs += sz; 1229 fragm_crs += sz;
@@ -1281,8 +1243,10 @@ error:
1281 msg_set_fragm_no(&fragm_hdr, ++fragm_no); 1243 msg_set_fragm_no(&fragm_hdr, ++fragm_no);
1282 prev = buf; 1244 prev = buf;
1283 buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); 1245 buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
1284 if (!buf) 1246 if (!buf) {
1247 res = -ENOMEM;
1285 goto error; 1248 goto error;
1249 }
1286 1250
1287 buf->next = NULL; 1251 buf->next = NULL;
1288 prev->next = buf; 1252 prev->next = buf;
@@ -1446,7 +1410,7 @@ static void link_reset_all(unsigned long addr)
1446} 1410}
1447 1411
1448static void link_retransmit_failure(struct tipc_link *l_ptr, 1412static void link_retransmit_failure(struct tipc_link *l_ptr,
1449 struct sk_buff *buf) 1413 struct sk_buff *buf)
1450{ 1414{
1451 struct tipc_msg *msg = buf_msg(buf); 1415 struct tipc_msg *msg = buf_msg(buf);
1452 1416
@@ -1901,8 +1865,8 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
1901 * Send protocol message to the other endpoint. 1865 * Send protocol message to the other endpoint.
1902 */ 1866 */
1903void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, 1867void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
1904 int probe_msg, u32 gap, u32 tolerance, 1868 int probe_msg, u32 gap, u32 tolerance,
1905 u32 priority, u32 ack_mtu) 1869 u32 priority, u32 ack_mtu)
1906{ 1870{
1907 struct sk_buff *buf = NULL; 1871 struct sk_buff *buf = NULL;
1908 struct tipc_msg *msg = l_ptr->pmsg; 1872 struct tipc_msg *msg = l_ptr->pmsg;
@@ -1988,6 +1952,7 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
1988 return; 1952 return;
1989 1953
1990 skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); 1954 skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
1955 buf->priority = TC_PRIO_CONTROL;
1991 1956
1992 /* Defer message if bearer is already blocked */ 1957 /* Defer message if bearer is already blocked */
1993 if (tipc_bearer_blocked(l_ptr->b_ptr)) { 1958 if (tipc_bearer_blocked(l_ptr->b_ptr)) {
@@ -2145,8 +2110,7 @@ exit:
2145 * another bearer. Owner node is locked. 2110 * another bearer. Owner node is locked.
2146 */ 2111 */
2147static void tipc_link_tunnel(struct tipc_link *l_ptr, 2112static void tipc_link_tunnel(struct tipc_link *l_ptr,
2148 struct tipc_msg *tunnel_hdr, 2113 struct tipc_msg *tunnel_hdr, struct tipc_msg *msg,
2149 struct tipc_msg *msg,
2150 u32 selector) 2114 u32 selector)
2151{ 2115{
2152 struct tipc_link *tunnel; 2116 struct tipc_link *tunnel;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index f2db8a87d9c5..ced60e2fc4f7 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -51,8 +51,8 @@ u32 tipc_msg_tot_importance(struct tipc_msg *m)
51} 51}
52 52
53 53
54void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, 54void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
55 u32 hsize, u32 destnode) 55 u32 destnode)
56{ 56{
57 memset(m, 0, hsize); 57 memset(m, 0, hsize);
58 msg_set_version(m); 58 msg_set_version(m);
@@ -73,8 +73,8 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
73 * Returns message data size or errno 73 * Returns message data size or errno
74 */ 74 */
75int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, 75int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
76 u32 num_sect, unsigned int total_len, 76 u32 num_sect, unsigned int total_len, int max_size,
77 int max_size, int usrmem, struct sk_buff **buf) 77 struct sk_buff **buf)
78{ 78{
79 int dsz, sz, hsz, pos, res, cnt; 79 int dsz, sz, hsz, pos, res, cnt;
80 80
@@ -92,14 +92,9 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
92 return -ENOMEM; 92 return -ENOMEM;
93 skb_copy_to_linear_data(*buf, hdr, hsz); 93 skb_copy_to_linear_data(*buf, hdr, hsz);
94 for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) { 94 for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
95 if (likely(usrmem)) 95 skb_copy_to_linear_data_offset(*buf, pos,
96 res = !copy_from_user((*buf)->data + pos, 96 msg_sect[cnt].iov_base,
97 msg_sect[cnt].iov_base, 97 msg_sect[cnt].iov_len);
98 msg_sect[cnt].iov_len);
99 else
100 skb_copy_to_linear_data_offset(*buf, pos,
101 msg_sect[cnt].iov_base,
102 msg_sect[cnt].iov_len);
103 pos += msg_sect[cnt].iov_len; 98 pos += msg_sect[cnt].iov_len;
104 } 99 }
105 if (likely(res)) 100 if (likely(res))
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index ba2a72beea68..5e4ccf5c27df 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -719,9 +719,9 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
719} 719}
720 720
721u32 tipc_msg_tot_importance(struct tipc_msg *m); 721u32 tipc_msg_tot_importance(struct tipc_msg *m);
722void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, 722void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
723 u32 hsize, u32 destnode); 723 u32 destnode);
724int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, 724int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
725 u32 num_sect, unsigned int total_len, 725 u32 num_sect, unsigned int total_len, int max_size,
726 int max_size, int usrmem, struct sk_buff **buf); 726 struct sk_buff **buf);
727#endif 727#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 24b167914311..09dcd54b04e1 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -440,7 +440,7 @@ found:
440 * sequence overlapping with the requested sequence 440 * sequence overlapping with the requested sequence
441 */ 441 */
442static void tipc_nameseq_subscribe(struct name_seq *nseq, 442static void tipc_nameseq_subscribe(struct name_seq *nseq,
443 struct tipc_subscription *s) 443 struct tipc_subscription *s)
444{ 444{
445 struct sub_seq *sseq = nseq->sseqs; 445 struct sub_seq *sseq = nseq->sseqs;
446 446
@@ -662,7 +662,7 @@ exit:
662 * tipc_nametbl_publish - add name publication to network name tables 662 * tipc_nametbl_publish - add name publication to network name tables
663 */ 663 */
664struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, 664struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
665 u32 scope, u32 port_ref, u32 key) 665 u32 scope, u32 port_ref, u32 key)
666{ 666{
667 struct publication *publ; 667 struct publication *publ;
668 668
@@ -753,7 +753,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
753 * subseq_list - print specified sub-sequence contents into the given buffer 753 * subseq_list - print specified sub-sequence contents into the given buffer
754 */ 754 */
755static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, 755static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth,
756 u32 index) 756 u32 index)
757{ 757{
758 char portIdStr[27]; 758 char portIdStr[27];
759 const char *scope_str[] = {"", " zone", " cluster", " node"}; 759 const char *scope_str[] = {"", " zone", " cluster", " node"};
@@ -792,7 +792,7 @@ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth,
792 * nameseq_list - print specified name sequence contents into the given buffer 792 * nameseq_list - print specified name sequence contents into the given buffer
793 */ 793 */
794static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth, 794static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth,
795 u32 type, u32 lowbound, u32 upbound, u32 index) 795 u32 type, u32 lowbound, u32 upbound, u32 index)
796{ 796{
797 struct sub_seq *sseq; 797 struct sub_seq *sseq;
798 char typearea[11]; 798 char typearea[11];
@@ -849,7 +849,7 @@ static int nametbl_header(char *buf, int len, u32 depth)
849 * nametbl_list - print specified name table contents into the given buffer 849 * nametbl_list - print specified name table contents into the given buffer
850 */ 850 */
851static int nametbl_list(char *buf, int len, u32 depth_info, 851static int nametbl_list(char *buf, int len, u32 depth_info,
852 u32 type, u32 lowbound, u32 upbound) 852 u32 type, u32 lowbound, u32 upbound)
853{ 853{
854 struct hlist_head *seq_head; 854 struct hlist_head *seq_head;
855 struct name_seq *seq; 855 struct name_seq *seq;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 71cb4dc712df..f02f48b9a216 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -87,14 +87,15 @@ extern rwlock_t tipc_nametbl_lock;
87struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); 87struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space);
88u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); 88u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node);
89int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, 89int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
90 struct tipc_port_list *dports); 90 struct tipc_port_list *dports);
91struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, 91struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
92 u32 scope, u32 port_ref, u32 key); 92 u32 scope, u32 port_ref, u32 key);
93int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key); 93int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key);
94struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, 94struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
95 u32 scope, u32 node, u32 ref, u32 key); 95 u32 scope, u32 node, u32 ref,
96struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, 96 u32 key);
97 u32 node, u32 ref, u32 key); 97struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node,
98 u32 ref, u32 key);
98void tipc_nametbl_subscribe(struct tipc_subscription *s); 99void tipc_nametbl_subscribe(struct tipc_subscription *s);
99void tipc_nametbl_unsubscribe(struct tipc_subscription *s); 100void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
100int tipc_nametbl_init(void); 101int tipc_nametbl_init(void);
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index 5e34b015da45..8a7384c04add 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -42,7 +42,7 @@
42 * tipc_nodesub_subscribe - create "node down" subscription for specified node 42 * tipc_nodesub_subscribe - create "node down" subscription for specified node
43 */ 43 */
44void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, 44void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr,
45 void *usr_handle, net_ev_handler handle_down) 45 void *usr_handle, net_ev_handler handle_down)
46{ 46{
47 if (in_own_node(addr)) { 47 if (in_own_node(addr)) {
48 node_sub->node = NULL; 48 node_sub->node = NULL;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 18098cac62f2..b3ed2fcab4fb 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -2,7 +2,7 @@
2 * net/tipc/port.c: TIPC port code 2 * net/tipc/port.c: TIPC port code
3 * 3 *
4 * Copyright (c) 1992-2007, Ericsson AB 4 * Copyright (c) 1992-2007, Ericsson AB
5 * Copyright (c) 2004-2008, 2010-2011, Wind River Systems 5 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -46,11 +46,7 @@
46 46
47#define MAX_REJECT_SIZE 1024 47#define MAX_REJECT_SIZE 1024
48 48
49static struct sk_buff *msg_queue_head;
50static struct sk_buff *msg_queue_tail;
51
52DEFINE_SPINLOCK(tipc_port_list_lock); 49DEFINE_SPINLOCK(tipc_port_list_lock);
53static DEFINE_SPINLOCK(queue_lock);
54 50
55static LIST_HEAD(ports); 51static LIST_HEAD(ports);
56static void port_handle_node_down(unsigned long ref); 52static void port_handle_node_down(unsigned long ref);
@@ -119,7 +115,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
119 msg_set_nameupper(hdr, seq->upper); 115 msg_set_nameupper(hdr, seq->upper);
120 msg_set_hdr_sz(hdr, MCAST_H_SIZE); 116 msg_set_hdr_sz(hdr, MCAST_H_SIZE);
121 res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, 117 res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE,
122 !oport->user_port, &buf); 118 &buf);
123 if (unlikely(!buf)) 119 if (unlikely(!buf))
124 return res; 120 return res;
125 121
@@ -206,14 +202,15 @@ exit:
206} 202}
207 203
208/** 204/**
209 * tipc_createport_raw - create a generic TIPC port 205 * tipc_createport - create a generic TIPC port
210 * 206 *
211 * Returns pointer to (locked) TIPC port, or NULL if unable to create it 207 * Returns pointer to (locked) TIPC port, or NULL if unable to create it
212 */ 208 */
213struct tipc_port *tipc_createport_raw(void *usr_handle, 209struct tipc_port *tipc_createport(struct sock *sk,
214 u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), 210 u32 (*dispatcher)(struct tipc_port *,
215 void (*wakeup)(struct tipc_port *), 211 struct sk_buff *),
216 const u32 importance) 212 void (*wakeup)(struct tipc_port *),
213 const u32 importance)
217{ 214{
218 struct tipc_port *p_ptr; 215 struct tipc_port *p_ptr;
219 struct tipc_msg *msg; 216 struct tipc_msg *msg;
@@ -231,14 +228,13 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
231 return NULL; 228 return NULL;
232 } 229 }
233 230
234 p_ptr->usr_handle = usr_handle; 231 p_ptr->sk = sk;
235 p_ptr->max_pkt = MAX_PKT_DEFAULT; 232 p_ptr->max_pkt = MAX_PKT_DEFAULT;
236 p_ptr->ref = ref; 233 p_ptr->ref = ref;
237 INIT_LIST_HEAD(&p_ptr->wait_list); 234 INIT_LIST_HEAD(&p_ptr->wait_list);
238 INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); 235 INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
239 p_ptr->dispatcher = dispatcher; 236 p_ptr->dispatcher = dispatcher;
240 p_ptr->wakeup = wakeup; 237 p_ptr->wakeup = wakeup;
241 p_ptr->user_port = NULL;
242 k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref); 238 k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref);
243 INIT_LIST_HEAD(&p_ptr->publications); 239 INIT_LIST_HEAD(&p_ptr->publications);
244 INIT_LIST_HEAD(&p_ptr->port_list); 240 INIT_LIST_HEAD(&p_ptr->port_list);
@@ -275,7 +271,6 @@ int tipc_deleteport(u32 ref)
275 buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); 271 buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
276 tipc_nodesub_unsubscribe(&p_ptr->subscription); 272 tipc_nodesub_unsubscribe(&p_ptr->subscription);
277 } 273 }
278 kfree(p_ptr->user_port);
279 274
280 spin_lock_bh(&tipc_port_list_lock); 275 spin_lock_bh(&tipc_port_list_lock);
281 list_del(&p_ptr->port_list); 276 list_del(&p_ptr->port_list);
@@ -448,7 +443,7 @@ int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
448 int res; 443 int res;
449 444
450 res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, 445 res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE,
451 !p_ptr->user_port, &buf); 446 &buf);
452 if (!buf) 447 if (!buf)
453 return res; 448 return res;
454 449
@@ -668,215 +663,6 @@ void tipc_port_reinit(void)
668 spin_unlock_bh(&tipc_port_list_lock); 663 spin_unlock_bh(&tipc_port_list_lock);
669} 664}
670 665
671
672/*
673 * port_dispatcher_sigh(): Signal handler for messages destinated
674 * to the tipc_port interface.
675 */
676static void port_dispatcher_sigh(void *dummy)
677{
678 struct sk_buff *buf;
679
680 spin_lock_bh(&queue_lock);
681 buf = msg_queue_head;
682 msg_queue_head = NULL;
683 spin_unlock_bh(&queue_lock);
684
685 while (buf) {
686 struct tipc_port *p_ptr;
687 struct user_port *up_ptr;
688 struct tipc_portid orig;
689 struct tipc_name_seq dseq;
690 void *usr_handle;
691 int connected;
692 int peer_invalid;
693 int published;
694 u32 message_type;
695
696 struct sk_buff *next = buf->next;
697 struct tipc_msg *msg = buf_msg(buf);
698 u32 dref = msg_destport(msg);
699
700 message_type = msg_type(msg);
701 if (message_type > TIPC_DIRECT_MSG)
702 goto reject; /* Unsupported message type */
703
704 p_ptr = tipc_port_lock(dref);
705 if (!p_ptr)
706 goto reject; /* Port deleted while msg in queue */
707
708 orig.ref = msg_origport(msg);
709 orig.node = msg_orignode(msg);
710 up_ptr = p_ptr->user_port;
711 usr_handle = up_ptr->usr_handle;
712 connected = p_ptr->connected;
713 peer_invalid = connected && !tipc_port_peer_msg(p_ptr, msg);
714 published = p_ptr->published;
715
716 if (unlikely(msg_errcode(msg)))
717 goto err;
718
719 switch (message_type) {
720
721 case TIPC_CONN_MSG:{
722 tipc_conn_msg_event cb = up_ptr->conn_msg_cb;
723 u32 dsz;
724
725 tipc_port_unlock(p_ptr);
726 if (unlikely(!cb))
727 goto reject;
728 if (unlikely(!connected)) {
729 if (tipc_connect(dref, &orig))
730 goto reject;
731 } else if (peer_invalid)
732 goto reject;
733 dsz = msg_data_sz(msg);
734 if (unlikely(dsz &&
735 (++p_ptr->conn_unacked >=
736 TIPC_FLOW_CONTROL_WIN)))
737 tipc_acknowledge(dref,
738 p_ptr->conn_unacked);
739 skb_pull(buf, msg_hdr_sz(msg));
740 cb(usr_handle, dref, &buf, msg_data(msg), dsz);
741 break;
742 }
743 case TIPC_DIRECT_MSG:{
744 tipc_msg_event cb = up_ptr->msg_cb;
745
746 tipc_port_unlock(p_ptr);
747 if (unlikely(!cb || connected))
748 goto reject;
749 skb_pull(buf, msg_hdr_sz(msg));
750 cb(usr_handle, dref, &buf, msg_data(msg),
751 msg_data_sz(msg), msg_importance(msg),
752 &orig);
753 break;
754 }
755 case TIPC_MCAST_MSG:
756 case TIPC_NAMED_MSG:{
757 tipc_named_msg_event cb = up_ptr->named_msg_cb;
758
759 tipc_port_unlock(p_ptr);
760 if (unlikely(!cb || connected || !published))
761 goto reject;
762 dseq.type = msg_nametype(msg);
763 dseq.lower = msg_nameinst(msg);
764 dseq.upper = (message_type == TIPC_NAMED_MSG)
765 ? dseq.lower : msg_nameupper(msg);
766 skb_pull(buf, msg_hdr_sz(msg));
767 cb(usr_handle, dref, &buf, msg_data(msg),
768 msg_data_sz(msg), msg_importance(msg),
769 &orig, &dseq);
770 break;
771 }
772 }
773 if (buf)
774 kfree_skb(buf);
775 buf = next;
776 continue;
777err:
778 switch (message_type) {
779
780 case TIPC_CONN_MSG:{
781 tipc_conn_shutdown_event cb =
782 up_ptr->conn_err_cb;
783
784 tipc_port_unlock(p_ptr);
785 if (!cb || !connected || peer_invalid)
786 break;
787 tipc_disconnect(dref);
788 skb_pull(buf, msg_hdr_sz(msg));
789 cb(usr_handle, dref, &buf, msg_data(msg),
790 msg_data_sz(msg), msg_errcode(msg));
791 break;
792 }
793 case TIPC_DIRECT_MSG:{
794 tipc_msg_err_event cb = up_ptr->err_cb;
795
796 tipc_port_unlock(p_ptr);
797 if (!cb || connected)
798 break;
799 skb_pull(buf, msg_hdr_sz(msg));
800 cb(usr_handle, dref, &buf, msg_data(msg),
801 msg_data_sz(msg), msg_errcode(msg), &orig);
802 break;
803 }
804 case TIPC_MCAST_MSG:
805 case TIPC_NAMED_MSG:{
806 tipc_named_msg_err_event cb =
807 up_ptr->named_err_cb;
808
809 tipc_port_unlock(p_ptr);
810 if (!cb || connected)
811 break;
812 dseq.type = msg_nametype(msg);
813 dseq.lower = msg_nameinst(msg);
814 dseq.upper = (message_type == TIPC_NAMED_MSG)
815 ? dseq.lower : msg_nameupper(msg);
816 skb_pull(buf, msg_hdr_sz(msg));
817 cb(usr_handle, dref, &buf, msg_data(msg),
818 msg_data_sz(msg), msg_errcode(msg), &dseq);
819 break;
820 }
821 }
822 if (buf)
823 kfree_skb(buf);
824 buf = next;
825 continue;
826reject:
827 tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
828 buf = next;
829 }
830}
831
832/*
833 * port_dispatcher(): Dispatcher for messages destinated
834 * to the tipc_port interface. Called with port locked.
835 */
836static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf)
837{
838 buf->next = NULL;
839 spin_lock_bh(&queue_lock);
840 if (msg_queue_head) {
841 msg_queue_tail->next = buf;
842 msg_queue_tail = buf;
843 } else {
844 msg_queue_tail = msg_queue_head = buf;
845 tipc_k_signal((Handler)port_dispatcher_sigh, 0);
846 }
847 spin_unlock_bh(&queue_lock);
848 return 0;
849}
850
851/*
852 * Wake up port after congestion: Called with port locked
853 */
854static void port_wakeup_sh(unsigned long ref)
855{
856 struct tipc_port *p_ptr;
857 struct user_port *up_ptr;
858 tipc_continue_event cb = NULL;
859 void *uh = NULL;
860
861 p_ptr = tipc_port_lock(ref);
862 if (p_ptr) {
863 up_ptr = p_ptr->user_port;
864 if (up_ptr) {
865 cb = up_ptr->continue_event_cb;
866 uh = up_ptr->usr_handle;
867 }
868 tipc_port_unlock(p_ptr);
869 }
870 if (cb)
871 cb(uh, ref);
872}
873
874
875static void port_wakeup(struct tipc_port *p_ptr)
876{
877 tipc_k_signal((Handler)port_wakeup_sh, p_ptr->ref);
878}
879
880void tipc_acknowledge(u32 ref, u32 ack) 666void tipc_acknowledge(u32 ref, u32 ack)
881{ 667{
882 struct tipc_port *p_ptr; 668 struct tipc_port *p_ptr;
@@ -893,50 +679,6 @@ void tipc_acknowledge(u32 ref, u32 ack)
893 tipc_net_route_msg(buf); 679 tipc_net_route_msg(buf);
894} 680}
895 681
896/*
897 * tipc_createport(): user level call.
898 */
899int tipc_createport(void *usr_handle,
900 unsigned int importance,
901 tipc_msg_err_event error_cb,
902 tipc_named_msg_err_event named_error_cb,
903 tipc_conn_shutdown_event conn_error_cb,
904 tipc_msg_event msg_cb,
905 tipc_named_msg_event named_msg_cb,
906 tipc_conn_msg_event conn_msg_cb,
907 tipc_continue_event continue_event_cb, /* May be zero */
908 u32 *portref)
909{
910 struct user_port *up_ptr;
911 struct tipc_port *p_ptr;
912
913 up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
914 if (!up_ptr) {
915 pr_warn("Port creation failed, no memory\n");
916 return -ENOMEM;
917 }
918 p_ptr = tipc_createport_raw(NULL, port_dispatcher, port_wakeup,
919 importance);
920 if (!p_ptr) {
921 kfree(up_ptr);
922 return -ENOMEM;
923 }
924
925 p_ptr->user_port = up_ptr;
926 up_ptr->usr_handle = usr_handle;
927 up_ptr->ref = p_ptr->ref;
928 up_ptr->err_cb = error_cb;
929 up_ptr->named_err_cb = named_error_cb;
930 up_ptr->conn_err_cb = conn_error_cb;
931 up_ptr->msg_cb = msg_cb;
932 up_ptr->named_msg_cb = named_msg_cb;
933 up_ptr->conn_msg_cb = conn_msg_cb;
934 up_ptr->continue_event_cb = continue_event_cb;
935 *portref = p_ptr->ref;
936 tipc_port_unlock(p_ptr);
937 return 0;
938}
939
940int tipc_portimportance(u32 ref, unsigned int *importance) 682int tipc_portimportance(u32 ref, unsigned int *importance)
941{ 683{
942 struct tipc_port *p_ptr; 684 struct tipc_port *p_ptr;
@@ -1184,7 +926,7 @@ static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_se
1184 int res; 926 int res;
1185 927
1186 res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len, 928 res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len,
1187 MAX_MSG_SIZE, !sender->user_port, &buf); 929 MAX_MSG_SIZE, &buf);
1188 if (likely(buf)) 930 if (likely(buf))
1189 tipc_port_recv_msg(buf); 931 tipc_port_recv_msg(buf);
1190 return res; 932 return res;
@@ -1322,43 +1064,3 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest,
1322 } 1064 }
1323 return -ELINKCONG; 1065 return -ELINKCONG;
1324} 1066}
1325
1326/**
1327 * tipc_send_buf2port - send message buffer to port identity
1328 */
1329int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest,
1330 struct sk_buff *buf, unsigned int dsz)
1331{
1332 struct tipc_port *p_ptr;
1333 struct tipc_msg *msg;
1334 int res;
1335
1336 p_ptr = (struct tipc_port *)tipc_ref_deref(ref);
1337 if (!p_ptr || p_ptr->connected)
1338 return -EINVAL;
1339
1340 msg = &p_ptr->phdr;
1341 msg_set_type(msg, TIPC_DIRECT_MSG);
1342 msg_set_destnode(msg, dest->node);
1343 msg_set_destport(msg, dest->ref);
1344 msg_set_hdr_sz(msg, BASIC_H_SIZE);
1345 msg_set_size(msg, BASIC_H_SIZE + dsz);
1346 if (skb_cow(buf, BASIC_H_SIZE))
1347 return -ENOMEM;
1348
1349 skb_push(buf, BASIC_H_SIZE);
1350 skb_copy_to_linear_data(buf, msg, BASIC_H_SIZE);
1351
1352 if (in_own_node(dest->node))
1353 res = tipc_port_recv_msg(buf);
1354 else
1355 res = tipc_send_buf_fast(buf, dest->node);
1356 if (likely(res != -ELINKCONG)) {
1357 if (res > 0)
1358 p_ptr->sent++;
1359 return res;
1360 }
1361 if (port_unreliable(p_ptr))
1362 return dsz;
1363 return -ELINKCONG;
1364}
diff --git a/net/tipc/port.h b/net/tipc/port.h
index fb66e2e5f4d1..5a7026b9c345 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -2,7 +2,7 @@
2 * net/tipc/port.h: Include file for TIPC port code 2 * net/tipc/port.h: Include file for TIPC port code
3 * 3 *
4 * Copyright (c) 1994-2007, Ericsson AB 4 * Copyright (c) 1994-2007, Ericsson AB
5 * Copyright (c) 2004-2007, 2010-2011, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -43,60 +43,12 @@
43#include "node_subscr.h" 43#include "node_subscr.h"
44 44
45#define TIPC_FLOW_CONTROL_WIN 512 45#define TIPC_FLOW_CONTROL_WIN 512
46 46#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
47typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, 47 SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
48 struct sk_buff **buf, unsigned char const *data,
49 unsigned int size, int reason,
50 struct tipc_portid const *attmpt_destid);
51
52typedef void (*tipc_named_msg_err_event) (void *usr_handle, u32 portref,
53 struct sk_buff **buf, unsigned char const *data,
54 unsigned int size, int reason,
55 struct tipc_name_seq const *attmpt_dest);
56
57typedef void (*tipc_conn_shutdown_event) (void *usr_handle, u32 portref,
58 struct sk_buff **buf, unsigned char const *data,
59 unsigned int size, int reason);
60
61typedef void (*tipc_msg_event) (void *usr_handle, u32 portref,
62 struct sk_buff **buf, unsigned char const *data,
63 unsigned int size, unsigned int importance,
64 struct tipc_portid const *origin);
65
66typedef void (*tipc_named_msg_event) (void *usr_handle, u32 portref,
67 struct sk_buff **buf, unsigned char const *data,
68 unsigned int size, unsigned int importance,
69 struct tipc_portid const *orig,
70 struct tipc_name_seq const *dest);
71
72typedef void (*tipc_conn_msg_event) (void *usr_handle, u32 portref,
73 struct sk_buff **buf, unsigned char const *data,
74 unsigned int size);
75
76typedef void (*tipc_continue_event) (void *usr_handle, u32 portref);
77
78/**
79 * struct user_port - TIPC user port (used with native API)
80 * @usr_handle: user-specified field
81 * @ref: object reference to associated TIPC port
82 *
83 * <various callback routines>
84 */
85struct user_port {
86 void *usr_handle;
87 u32 ref;
88 tipc_msg_err_event err_cb;
89 tipc_named_msg_err_event named_err_cb;
90 tipc_conn_shutdown_event conn_err_cb;
91 tipc_msg_event msg_cb;
92 tipc_named_msg_event named_msg_cb;
93 tipc_conn_msg_event conn_msg_cb;
94 tipc_continue_event continue_event_cb;
95};
96 48
97/** 49/**
98 * struct tipc_port - TIPC port structure 50 * struct tipc_port - TIPC port structure
99 * @usr_handle: pointer to additional user-defined information about port 51 * @sk: pointer to socket handle
100 * @lock: pointer to spinlock for controlling access to port 52 * @lock: pointer to spinlock for controlling access to port
101 * @connected: non-zero if port is currently connected to a peer port 53 * @connected: non-zero if port is currently connected to a peer port
102 * @conn_type: TIPC type used when connection was established 54 * @conn_type: TIPC type used when connection was established
@@ -110,7 +62,6 @@ struct user_port {
110 * @port_list: adjacent ports in TIPC's global list of ports 62 * @port_list: adjacent ports in TIPC's global list of ports
111 * @dispatcher: ptr to routine which handles received messages 63 * @dispatcher: ptr to routine which handles received messages
112 * @wakeup: ptr to routine to call when port is no longer congested 64 * @wakeup: ptr to routine to call when port is no longer congested
113 * @user_port: ptr to user port associated with port (if any)
114 * @wait_list: adjacent ports in list of ports waiting on link congestion 65 * @wait_list: adjacent ports in list of ports waiting on link congestion
115 * @waiting_pkts: 66 * @waiting_pkts:
116 * @sent: # of non-empty messages sent by port 67 * @sent: # of non-empty messages sent by port
@@ -123,7 +74,7 @@ struct user_port {
123 * @subscription: "node down" subscription used to terminate failed connections 74 * @subscription: "node down" subscription used to terminate failed connections
124 */ 75 */
125struct tipc_port { 76struct tipc_port {
126 void *usr_handle; 77 struct sock *sk;
127 spinlock_t *lock; 78 spinlock_t *lock;
128 int connected; 79 int connected;
129 u32 conn_type; 80 u32 conn_type;
@@ -137,7 +88,6 @@ struct tipc_port {
137 struct list_head port_list; 88 struct list_head port_list;
138 u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); 89 u32 (*dispatcher)(struct tipc_port *, struct sk_buff *);
139 void (*wakeup)(struct tipc_port *); 90 void (*wakeup)(struct tipc_port *);
140 struct user_port *user_port;
141 struct list_head wait_list; 91 struct list_head wait_list;
142 u32 waiting_pkts; 92 u32 waiting_pkts;
143 u32 sent; 93 u32 sent;
@@ -156,24 +106,16 @@ struct tipc_port_list;
156/* 106/*
157 * TIPC port manipulation routines 107 * TIPC port manipulation routines
158 */ 108 */
159struct tipc_port *tipc_createport_raw(void *usr_handle, 109struct tipc_port *tipc_createport(struct sock *sk,
160 u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), 110 u32 (*dispatcher)(struct tipc_port *,
161 void (*wakeup)(struct tipc_port *), const u32 importance); 111 struct sk_buff *),
112 void (*wakeup)(struct tipc_port *),
113 const u32 importance);
162 114
163int tipc_reject_msg(struct sk_buff *buf, u32 err); 115int tipc_reject_msg(struct sk_buff *buf, u32 err);
164 116
165int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode);
166
167void tipc_acknowledge(u32 port_ref, u32 ack); 117void tipc_acknowledge(u32 port_ref, u32 ack);
168 118
169int tipc_createport(void *usr_handle,
170 unsigned int importance, tipc_msg_err_event error_cb,
171 tipc_named_msg_err_event named_error_cb,
172 tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb,
173 tipc_named_msg_event named_msg_cb,
174 tipc_conn_msg_event conn_msg_cb,
175 tipc_continue_event continue_event_cb, u32 *portref);
176
177int tipc_deleteport(u32 portref); 119int tipc_deleteport(u32 portref);
178 120
179int tipc_portimportance(u32 portref, unsigned int *importance); 121int tipc_portimportance(u32 portref, unsigned int *importance);
@@ -186,9 +128,9 @@ int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable);
186int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); 128int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable);
187 129
188int tipc_publish(u32 portref, unsigned int scope, 130int tipc_publish(u32 portref, unsigned int scope,
189 struct tipc_name_seq const *name_seq); 131 struct tipc_name_seq const *name_seq);
190int tipc_withdraw(u32 portref, unsigned int scope, 132int tipc_withdraw(u32 portref, unsigned int scope,
191 struct tipc_name_seq const *name_seq); 133 struct tipc_name_seq const *name_seq);
192 134
193int tipc_connect(u32 portref, struct tipc_portid const *port); 135int tipc_connect(u32 portref, struct tipc_portid const *port);
194 136
@@ -220,9 +162,6 @@ int tipc_send2port(u32 portref, struct tipc_portid const *dest,
220 unsigned int num_sect, struct iovec const *msg_sect, 162 unsigned int num_sect, struct iovec const *msg_sect,
221 unsigned int total_len); 163 unsigned int total_len);
222 164
223int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest,
224 struct sk_buff *buf, unsigned int dsz);
225
226int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, 165int tipc_multicast(u32 portref, struct tipc_name_seq const *seq,
227 unsigned int section_count, struct iovec const *msg, 166 unsigned int section_count, struct iovec const *msg,
228 unsigned int total_len); 167 unsigned int total_len);
diff --git a/net/tipc/server.c b/net/tipc/server.c
new file mode 100644
index 000000000000..fd3fa57a410e
--- /dev/null
+++ b/net/tipc/server.c
@@ -0,0 +1,605 @@
1/*
2 * net/tipc/server.c: TIPC server infrastructure
3 *
4 * Copyright (c) 2012-2013, Wind River Systems
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include "server.h"
37#include "core.h"
38#include <net/sock.h>
39
40/* Number of messages to send before rescheduling */
41#define MAX_SEND_MSG_COUNT 25
42#define MAX_RECV_MSG_COUNT 25
43#define CF_CONNECTED 1
44
45#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
46
47/**
48 * struct tipc_conn - TIPC connection structure
49 * @kref: reference counter to connection object
50 * @conid: connection identifier
51 * @sock: socket handler associated with connection
52 * @flags: indicates connection state
53 * @server: pointer to connected server
54 * @rwork: receive work item
55 * @usr_data: user-specified field
56 * @rx_action: what to do when connection socket is active
57 * @outqueue: pointer to first outbound message in queue
58 * @outqueue_lock: controll access to the outqueue
59 * @outqueue: list of connection objects for its server
60 * @swork: send work item
61 */
62struct tipc_conn {
63 struct kref kref;
64 int conid;
65 struct socket *sock;
66 unsigned long flags;
67 struct tipc_server *server;
68 struct work_struct rwork;
69 int (*rx_action) (struct tipc_conn *con);
70 void *usr_data;
71 struct list_head outqueue;
72 spinlock_t outqueue_lock;
73 struct work_struct swork;
74};
75
76/* An entry waiting to be sent */
77struct outqueue_entry {
78 struct list_head list;
79 struct kvec iov;
80 struct sockaddr_tipc dest;
81};
82
83static void tipc_recv_work(struct work_struct *work);
84static void tipc_send_work(struct work_struct *work);
85static void tipc_clean_outqueues(struct tipc_conn *con);
86
87static void tipc_conn_kref_release(struct kref *kref)
88{
89 struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
90 struct tipc_server *s = con->server;
91
92 if (con->sock) {
93 tipc_sock_release_local(con->sock);
94 con->sock = NULL;
95 }
96
97 tipc_clean_outqueues(con);
98
99 if (con->conid)
100 s->tipc_conn_shutdown(con->conid, con->usr_data);
101
102 kfree(con);
103}
104
105static void conn_put(struct tipc_conn *con)
106{
107 kref_put(&con->kref, tipc_conn_kref_release);
108}
109
110static void conn_get(struct tipc_conn *con)
111{
112 kref_get(&con->kref);
113}
114
115static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
116{
117 struct tipc_conn *con;
118
119 spin_lock_bh(&s->idr_lock);
120 con = idr_find(&s->conn_idr, conid);
121 if (con)
122 conn_get(con);
123 spin_unlock_bh(&s->idr_lock);
124 return con;
125}
126
127static void sock_data_ready(struct sock *sk, int unused)
128{
129 struct tipc_conn *con;
130
131 read_lock(&sk->sk_callback_lock);
132 con = sock2con(sk);
133 if (con && test_bit(CF_CONNECTED, &con->flags)) {
134 conn_get(con);
135 if (!queue_work(con->server->rcv_wq, &con->rwork))
136 conn_put(con);
137 }
138 read_unlock(&sk->sk_callback_lock);
139}
140
141static void sock_write_space(struct sock *sk)
142{
143 struct tipc_conn *con;
144
145 read_lock(&sk->sk_callback_lock);
146 con = sock2con(sk);
147 if (con && test_bit(CF_CONNECTED, &con->flags)) {
148 conn_get(con);
149 if (!queue_work(con->server->send_wq, &con->swork))
150 conn_put(con);
151 }
152 read_unlock(&sk->sk_callback_lock);
153}
154
155static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
156{
157 struct sock *sk = sock->sk;
158
159 write_lock_bh(&sk->sk_callback_lock);
160
161 sk->sk_data_ready = sock_data_ready;
162 sk->sk_write_space = sock_write_space;
163 sk->sk_user_data = con;
164
165 con->sock = sock;
166
167 write_unlock_bh(&sk->sk_callback_lock);
168}
169
170static void tipc_unregister_callbacks(struct tipc_conn *con)
171{
172 struct sock *sk = con->sock->sk;
173
174 write_lock_bh(&sk->sk_callback_lock);
175 sk->sk_user_data = NULL;
176 write_unlock_bh(&sk->sk_callback_lock);
177}
178
179static void tipc_close_conn(struct tipc_conn *con)
180{
181 struct tipc_server *s = con->server;
182
183 if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
184 spin_lock_bh(&s->idr_lock);
185 idr_remove(&s->conn_idr, con->conid);
186 s->idr_in_use--;
187 spin_unlock_bh(&s->idr_lock);
188
189 tipc_unregister_callbacks(con);
190
191 /* We shouldn't flush pending works as we may be in the
192 * thread. In fact the races with pending rx/tx work structs
193 * are harmless for us here as we have already deleted this
194 * connection from server connection list and set
195 * sk->sk_user_data to 0 before releasing connection object.
196 */
197 kernel_sock_shutdown(con->sock, SHUT_RDWR);
198
199 conn_put(con);
200 }
201}
202
203static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
204{
205 struct tipc_conn *con;
206 int ret;
207
208 con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC);
209 if (!con)
210 return ERR_PTR(-ENOMEM);
211
212 kref_init(&con->kref);
213 INIT_LIST_HEAD(&con->outqueue);
214 spin_lock_init(&con->outqueue_lock);
215 INIT_WORK(&con->swork, tipc_send_work);
216 INIT_WORK(&con->rwork, tipc_recv_work);
217
218 spin_lock_bh(&s->idr_lock);
219 ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
220 if (ret < 0) {
221 kfree(con);
222 spin_unlock_bh(&s->idr_lock);
223 return ERR_PTR(-ENOMEM);
224 }
225 con->conid = ret;
226 s->idr_in_use++;
227 spin_unlock_bh(&s->idr_lock);
228
229 set_bit(CF_CONNECTED, &con->flags);
230 con->server = s;
231
232 return con;
233}
234
235static int tipc_receive_from_sock(struct tipc_conn *con)
236{
237 struct msghdr msg = {};
238 struct tipc_server *s = con->server;
239 struct sockaddr_tipc addr;
240 struct kvec iov;
241 void *buf;
242 int ret;
243
244 buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC);
245 if (!buf) {
246 ret = -ENOMEM;
247 goto out_close;
248 }
249
250 iov.iov_base = buf;
251 iov.iov_len = s->max_rcvbuf_size;
252 msg.msg_name = &addr;
253 ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
254 MSG_DONTWAIT);
255 if (ret <= 0) {
256 kmem_cache_free(s->rcvbuf_cache, buf);
257 goto out_close;
258 }
259
260 s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret);
261
262 kmem_cache_free(s->rcvbuf_cache, buf);
263
264 return 0;
265
266out_close:
267 if (ret != -EWOULDBLOCK)
268 tipc_close_conn(con);
269 else if (ret == 0)
270 /* Don't return success if we really got EOF */
271 ret = -EAGAIN;
272
273 return ret;
274}
275
276static int tipc_accept_from_sock(struct tipc_conn *con)
277{
278 struct tipc_server *s = con->server;
279 struct socket *sock = con->sock;
280 struct socket *newsock;
281 struct tipc_conn *newcon;
282 int ret;
283
284 ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK);
285 if (ret < 0)
286 return ret;
287
288 newcon = tipc_alloc_conn(con->server);
289 if (IS_ERR(newcon)) {
290 ret = PTR_ERR(newcon);
291 sock_release(newsock);
292 return ret;
293 }
294
295 newcon->rx_action = tipc_receive_from_sock;
296 tipc_register_callbacks(newsock, newcon);
297
298 /* Notify that new connection is incoming */
299 newcon->usr_data = s->tipc_conn_new(newcon->conid);
300
301 /* Wake up receive process in case of 'SYN+' message */
302 newsock->sk->sk_data_ready(newsock->sk, 0);
303 return ret;
304}
305
306static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
307{
308 struct tipc_server *s = con->server;
309 struct socket *sock = NULL;
310 int ret;
311
312 ret = tipc_sock_create_local(s->type, &sock);
313 if (ret < 0)
314 return NULL;
315 ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
316 (char *)&s->imp, sizeof(s->imp));
317 if (ret < 0)
318 goto create_err;
319 ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
320 if (ret < 0)
321 goto create_err;
322
323 switch (s->type) {
324 case SOCK_STREAM:
325 case SOCK_SEQPACKET:
326 con->rx_action = tipc_accept_from_sock;
327
328 ret = kernel_listen(sock, 0);
329 if (ret < 0)
330 goto create_err;
331 break;
332 case SOCK_DGRAM:
333 case SOCK_RDM:
334 con->rx_action = tipc_receive_from_sock;
335 break;
336 default:
337 pr_err("Unknown socket type %d\n", s->type);
338 goto create_err;
339 }
340 return sock;
341
342create_err:
343 sock_release(sock);
344 con->sock = NULL;
345 return NULL;
346}
347
348static int tipc_open_listening_sock(struct tipc_server *s)
349{
350 struct socket *sock;
351 struct tipc_conn *con;
352
353 con = tipc_alloc_conn(s);
354 if (IS_ERR(con))
355 return PTR_ERR(con);
356
357 sock = tipc_create_listen_sock(con);
358 if (!sock) {
359 idr_remove(&s->conn_idr, con->conid);
360 s->idr_in_use--;
361 kfree(con);
362 return -EINVAL;
363 }
364
365 tipc_register_callbacks(sock, con);
366 return 0;
367}
368
369static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
370{
371 struct outqueue_entry *entry;
372 void *buf;
373
374 entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC);
375 if (!entry)
376 return NULL;
377
378 buf = kmalloc(len, GFP_ATOMIC);
379 if (!buf) {
380 kfree(entry);
381 return NULL;
382 }
383
384 memcpy(buf, data, len);
385 entry->iov.iov_base = buf;
386 entry->iov.iov_len = len;
387
388 return entry;
389}
390
391static void tipc_free_entry(struct outqueue_entry *e)
392{
393 kfree(e->iov.iov_base);
394 kfree(e);
395}
396
397static void tipc_clean_outqueues(struct tipc_conn *con)
398{
399 struct outqueue_entry *e, *safe;
400
401 spin_lock_bh(&con->outqueue_lock);
402 list_for_each_entry_safe(e, safe, &con->outqueue, list) {
403 list_del(&e->list);
404 tipc_free_entry(e);
405 }
406 spin_unlock_bh(&con->outqueue_lock);
407}
408
409int tipc_conn_sendmsg(struct tipc_server *s, int conid,
410 struct sockaddr_tipc *addr, void *data, size_t len)
411{
412 struct outqueue_entry *e;
413 struct tipc_conn *con;
414
415 con = tipc_conn_lookup(s, conid);
416 if (!con)
417 return -EINVAL;
418
419 e = tipc_alloc_entry(data, len);
420 if (!e) {
421 conn_put(con);
422 return -ENOMEM;
423 }
424
425 if (addr)
426 memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc));
427
428 spin_lock_bh(&con->outqueue_lock);
429 list_add_tail(&e->list, &con->outqueue);
430 spin_unlock_bh(&con->outqueue_lock);
431
432 if (test_bit(CF_CONNECTED, &con->flags))
433 if (!queue_work(s->send_wq, &con->swork))
434 conn_put(con);
435
436 return 0;
437}
438
439void tipc_conn_terminate(struct tipc_server *s, int conid)
440{
441 struct tipc_conn *con;
442
443 con = tipc_conn_lookup(s, conid);
444 if (con) {
445 tipc_close_conn(con);
446 conn_put(con);
447 }
448}
449
450static void tipc_send_to_sock(struct tipc_conn *con)
451{
452 int count = 0;
453 struct tipc_server *s = con->server;
454 struct outqueue_entry *e;
455 struct msghdr msg;
456 int ret;
457
458 spin_lock_bh(&con->outqueue_lock);
459 while (1) {
460 e = list_entry(con->outqueue.next, struct outqueue_entry,
461 list);
462 if ((struct list_head *) e == &con->outqueue)
463 break;
464 spin_unlock_bh(&con->outqueue_lock);
465
466 memset(&msg, 0, sizeof(msg));
467 msg.msg_flags = MSG_DONTWAIT;
468
469 if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
470 msg.msg_name = &e->dest;
471 msg.msg_namelen = sizeof(struct sockaddr_tipc);
472 }
473 ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
474 e->iov.iov_len);
475 if (ret == -EWOULDBLOCK || ret == 0) {
476 cond_resched();
477 goto out;
478 } else if (ret < 0) {
479 goto send_err;
480 }
481
482 /* Don't starve users filling buffers */
483 if (++count >= MAX_SEND_MSG_COUNT) {
484 cond_resched();
485 count = 0;
486 }
487
488 spin_lock_bh(&con->outqueue_lock);
489 list_del(&e->list);
490 tipc_free_entry(e);
491 }
492 spin_unlock_bh(&con->outqueue_lock);
493out:
494 return;
495
496send_err:
497 tipc_close_conn(con);
498}
499
500static void tipc_recv_work(struct work_struct *work)
501{
502 struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
503 int count = 0;
504
505 while (test_bit(CF_CONNECTED, &con->flags)) {
506 if (con->rx_action(con))
507 break;
508
509 /* Don't flood Rx machine */
510 if (++count >= MAX_RECV_MSG_COUNT) {
511 cond_resched();
512 count = 0;
513 }
514 }
515 conn_put(con);
516}
517
518static void tipc_send_work(struct work_struct *work)
519{
520 struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
521
522 if (test_bit(CF_CONNECTED, &con->flags))
523 tipc_send_to_sock(con);
524
525 conn_put(con);
526}
527
528static void tipc_work_stop(struct tipc_server *s)
529{
530 destroy_workqueue(s->rcv_wq);
531 destroy_workqueue(s->send_wq);
532}
533
534static int tipc_work_start(struct tipc_server *s)
535{
536 s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1);
537 if (!s->rcv_wq) {
538 pr_err("can't start tipc receive workqueue\n");
539 return -ENOMEM;
540 }
541
542 s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1);
543 if (!s->send_wq) {
544 pr_err("can't start tipc send workqueue\n");
545 destroy_workqueue(s->rcv_wq);
546 return -ENOMEM;
547 }
548
549 return 0;
550}
551
552int tipc_server_start(struct tipc_server *s)
553{
554 int ret;
555
556 spin_lock_init(&s->idr_lock);
557 idr_init(&s->conn_idr);
558 s->idr_in_use = 0;
559
560 s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size,
561 0, SLAB_HWCACHE_ALIGN, NULL);
562 if (!s->rcvbuf_cache)
563 return -ENOMEM;
564
565 ret = tipc_work_start(s);
566 if (ret < 0) {
567 kmem_cache_destroy(s->rcvbuf_cache);
568 return ret;
569 }
570 ret = tipc_open_listening_sock(s);
571 if (ret < 0) {
572 tipc_work_stop(s);
573 kmem_cache_destroy(s->rcvbuf_cache);
574 return ret;
575 }
576 s->enabled = 1;
577 return ret;
578}
579
580void tipc_server_stop(struct tipc_server *s)
581{
582 struct tipc_conn *con;
583 int total = 0;
584 int id;
585
586 if (!s->enabled)
587 return;
588
589 s->enabled = 0;
590 spin_lock_bh(&s->idr_lock);
591 for (id = 0; total < s->idr_in_use; id++) {
592 con = idr_find(&s->conn_idr, id);
593 if (con) {
594 total++;
595 spin_unlock_bh(&s->idr_lock);
596 tipc_close_conn(con);
597 spin_lock_bh(&s->idr_lock);
598 }
599 }
600 spin_unlock_bh(&s->idr_lock);
601
602 tipc_work_stop(s);
603 kmem_cache_destroy(s->rcvbuf_cache);
604 idr_destroy(&s->conn_idr);
605}
diff --git a/net/tipc/server.h b/net/tipc/server.h
new file mode 100644
index 000000000000..98b23f20bc0f
--- /dev/null
+++ b/net/tipc/server.h
@@ -0,0 +1,94 @@
1/*
2 * net/tipc/server.h: Include file for TIPC server code
3 *
4 * Copyright (c) 2012-2013, Wind River Systems
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#ifndef _TIPC_SERVER_H
37#define _TIPC_SERVER_H
38
39#include "core.h"
40
41#define TIPC_SERVER_NAME_LEN 32
42
43/**
44 * struct tipc_server - TIPC server structure
45 * @conn_idr: identifier set of connection
46 * @idr_lock: protect the connection identifier set
47 * @idr_in_use: amount of allocated identifier entry
48 * @rcvbuf_cache: memory cache of server receive buffer
49 * @rcv_wq: receive workqueue
50 * @send_wq: send workqueue
51 * @max_rcvbuf_size: maximum permitted receive message length
52 * @tipc_conn_new: callback will be called when new connection is incoming
53 * @tipc_conn_shutdown: callback will be called when connection is shut down
54 * @tipc_conn_recvmsg: callback will be called when message arrives
55 * @saddr: TIPC server address
56 * @name: server name
57 * @imp: message importance
58 * @type: socket type
59 * @enabled: identify whether server is launched or not
60 */
61struct tipc_server {
62 struct idr conn_idr;
63 spinlock_t idr_lock;
64 int idr_in_use;
65 struct kmem_cache *rcvbuf_cache;
66 struct workqueue_struct *rcv_wq;
67 struct workqueue_struct *send_wq;
68 int max_rcvbuf_size;
69 void *(*tipc_conn_new) (int conid);
70 void (*tipc_conn_shutdown) (int conid, void *usr_data);
71 void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr,
72 void *usr_data, void *buf, size_t len);
73 struct sockaddr_tipc *saddr;
74 const char name[TIPC_SERVER_NAME_LEN];
75 int imp;
76 int type;
77 int enabled;
78};
79
80int tipc_conn_sendmsg(struct tipc_server *s, int conid,
81 struct sockaddr_tipc *addr, void *data, size_t len);
82
83/**
84 * tipc_conn_terminate - terminate connection with server
85 *
86 * Note: Must call it in process context since it might sleep
87 */
88void tipc_conn_terminate(struct tipc_server *s, int conid);
89
90int tipc_server_start(struct tipc_server *s);
91
92void tipc_server_stop(struct tipc_server *s);
93
94#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 515ce38e4f4c..6cc7ddd2fb7c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2,7 +2,7 @@
2 * net/tipc/socket.c: TIPC socket API 2 * net/tipc/socket.c: TIPC socket API
3 * 3 *
4 * Copyright (c) 2001-2007, 2012 Ericsson AB 4 * Copyright (c) 2001-2007, 2012 Ericsson AB
5 * Copyright (c) 2004-2008, 2010-2012, Wind River Systems 5 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -43,8 +43,6 @@
43#define SS_LISTENING -1 /* socket is listening */ 43#define SS_LISTENING -1 /* socket is listening */
44#define SS_READY -2 /* socket is connectionless */ 44#define SS_READY -2 /* socket is connectionless */
45 45
46#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
47 SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
48#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ 46#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
49 47
50struct tipc_sock { 48struct tipc_sock {
@@ -65,12 +63,15 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
65static void wakeupdispatch(struct tipc_port *tport); 63static void wakeupdispatch(struct tipc_port *tport);
66static void tipc_data_ready(struct sock *sk, int len); 64static void tipc_data_ready(struct sock *sk, int len);
67static void tipc_write_space(struct sock *sk); 65static void tipc_write_space(struct sock *sk);
66static int release(struct socket *sock);
67static int accept(struct socket *sock, struct socket *new_sock, int flags);
68 68
69static const struct proto_ops packet_ops; 69static const struct proto_ops packet_ops;
70static const struct proto_ops stream_ops; 70static const struct proto_ops stream_ops;
71static const struct proto_ops msg_ops; 71static const struct proto_ops msg_ops;
72 72
73static struct proto tipc_proto; 73static struct proto tipc_proto;
74static struct proto tipc_proto_kern;
74 75
75static int sockets_enabled; 76static int sockets_enabled;
76 77
@@ -143,7 +144,7 @@ static void reject_rx_queue(struct sock *sk)
143} 144}
144 145
145/** 146/**
146 * tipc_create - create a TIPC socket 147 * tipc_sk_create - create a TIPC socket
147 * @net: network namespace (must be default network) 148 * @net: network namespace (must be default network)
148 * @sock: pre-allocated socket structure 149 * @sock: pre-allocated socket structure
149 * @protocol: protocol indicator (must be 0) 150 * @protocol: protocol indicator (must be 0)
@@ -154,8 +155,8 @@ static void reject_rx_queue(struct sock *sk)
154 * 155 *
155 * Returns 0 on success, errno otherwise 156 * Returns 0 on success, errno otherwise
156 */ 157 */
157static int tipc_create(struct net *net, struct socket *sock, int protocol, 158static int tipc_sk_create(struct net *net, struct socket *sock, int protocol,
158 int kern) 159 int kern)
159{ 160{
160 const struct proto_ops *ops; 161 const struct proto_ops *ops;
161 socket_state state; 162 socket_state state;
@@ -185,13 +186,17 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
185 } 186 }
186 187
187 /* Allocate socket's protocol area */ 188 /* Allocate socket's protocol area */
188 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); 189 if (!kern)
190 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
191 else
192 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern);
193
189 if (sk == NULL) 194 if (sk == NULL)
190 return -ENOMEM; 195 return -ENOMEM;
191 196
192 /* Allocate TIPC port for socket to use */ 197 /* Allocate TIPC port for socket to use */
193 tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch, 198 tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch,
194 TIPC_LOW_IMPORTANCE); 199 TIPC_LOW_IMPORTANCE);
195 if (unlikely(!tp_ptr)) { 200 if (unlikely(!tp_ptr)) {
196 sk_free(sk); 201 sk_free(sk);
197 return -ENOMEM; 202 return -ENOMEM;
@@ -203,6 +208,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
203 208
204 sock_init_data(sock, sk); 209 sock_init_data(sock, sk);
205 sk->sk_backlog_rcv = backlog_rcv; 210 sk->sk_backlog_rcv = backlog_rcv;
211 sk->sk_rcvbuf = sysctl_tipc_rmem[1];
206 sk->sk_data_ready = tipc_data_ready; 212 sk->sk_data_ready = tipc_data_ready;
207 sk->sk_write_space = tipc_write_space; 213 sk->sk_write_space = tipc_write_space;
208 tipc_sk(sk)->p = tp_ptr; 214 tipc_sk(sk)->p = tp_ptr;
@@ -220,6 +226,78 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
220} 226}
221 227
222/** 228/**
229 * tipc_sock_create_local - create TIPC socket from inside TIPC module
230 * @type: socket type - SOCK_RDM or SOCK_SEQPACKET
231 *
232 * We cannot use sock_creat_kern here because it bumps module user count.
233 * Since socket owner and creator is the same module we must make sure
234 * that module count remains zero for module local sockets, otherwise
235 * we cannot do rmmod.
236 *
237 * Returns 0 on success, errno otherwise
238 */
239int tipc_sock_create_local(int type, struct socket **res)
240{
241 int rc;
242 struct sock *sk;
243
244 rc = sock_create_lite(AF_TIPC, type, 0, res);
245 if (rc < 0) {
246 pr_err("Failed to create kernel socket\n");
247 return rc;
248 }
249 tipc_sk_create(&init_net, *res, 0, 1);
250
251 sk = (*res)->sk;
252
253 return 0;
254}
255
256/**
257 * tipc_sock_release_local - release socket created by tipc_sock_create_local
258 * @sock: the socket to be released.
259 *
260 * Module reference count is not incremented when such sockets are created,
261 * so we must keep it from being decremented when they are released.
262 */
263void tipc_sock_release_local(struct socket *sock)
264{
265 release(sock);
266 sock->ops = NULL;
267 sock_release(sock);
268}
269
270/**
271 * tipc_sock_accept_local - accept a connection on a socket created
272 * with tipc_sock_create_local. Use this function to avoid that
273 * module reference count is inadvertently incremented.
274 *
275 * @sock: the accepting socket
276 * @newsock: reference to the new socket to be created
277 * @flags: socket flags
278 */
279
280int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
281 int flags)
282{
283 struct sock *sk = sock->sk;
284 int ret;
285
286 ret = sock_create_lite(sk->sk_family, sk->sk_type,
287 sk->sk_protocol, newsock);
288 if (ret < 0)
289 return ret;
290
291 ret = accept(sock, *newsock, flags);
292 if (ret < 0) {
293 sock_release(*newsock);
294 return ret;
295 }
296 (*newsock)->ops = sock->ops;
297 return ret;
298}
299
300/**
223 * release - destroy a TIPC socket 301 * release - destroy a TIPC socket
224 * @sock: socket to destroy 302 * @sock: socket to destroy
225 * 303 *
@@ -324,7 +402,9 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
324 else if (addr->addrtype != TIPC_ADDR_NAMESEQ) 402 else if (addr->addrtype != TIPC_ADDR_NAMESEQ)
325 return -EAFNOSUPPORT; 403 return -EAFNOSUPPORT;
326 404
327 if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) 405 if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
406 (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
407 (addr->addr.nameseq.type != TIPC_CFG_SRV))
328 return -EACCES; 408 return -EACCES;
329 409
330 return (addr->scope > 0) ? 410 return (addr->scope > 0) ?
@@ -519,8 +599,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
519 res = -EISCONN; 599 res = -EISCONN;
520 goto exit; 600 goto exit;
521 } 601 }
522 if ((tport->published) || 602 if (tport->published) {
523 ((sock->type == SOCK_STREAM) && (total_len != 0))) {
524 res = -EOPNOTSUPP; 603 res = -EOPNOTSUPP;
525 goto exit; 604 goto exit;
526 } 605 }
@@ -810,7 +889,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
810 * Returns 0 if successful, otherwise errno 889 * Returns 0 if successful, otherwise errno
811 */ 890 */
812static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, 891static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
813 struct tipc_port *tport) 892 struct tipc_port *tport)
814{ 893{
815 u32 anc_data[3]; 894 u32 anc_data[3];
816 u32 err; 895 u32 err;
@@ -1011,8 +1090,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1011 1090
1012 lock_sock(sk); 1091 lock_sock(sk);
1013 1092
1014 if (unlikely((sock->state == SS_UNCONNECTED) || 1093 if (unlikely((sock->state == SS_UNCONNECTED))) {
1015 (sock->state == SS_CONNECTING))) {
1016 res = -ENOTCONN; 1094 res = -ENOTCONN;
1017 goto exit; 1095 goto exit;
1018 } 1096 }
@@ -1179,7 +1257,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
1179 /* Accept only ACK or NACK message */ 1257 /* Accept only ACK or NACK message */
1180 if (unlikely(msg_errcode(msg))) { 1258 if (unlikely(msg_errcode(msg))) {
1181 sock->state = SS_DISCONNECTING; 1259 sock->state = SS_DISCONNECTING;
1182 sk->sk_err = -ECONNREFUSED; 1260 sk->sk_err = ECONNREFUSED;
1183 retval = TIPC_OK; 1261 retval = TIPC_OK;
1184 break; 1262 break;
1185 } 1263 }
@@ -1190,7 +1268,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
1190 res = auto_connect(sock, msg); 1268 res = auto_connect(sock, msg);
1191 if (res) { 1269 if (res) {
1192 sock->state = SS_DISCONNECTING; 1270 sock->state = SS_DISCONNECTING;
1193 sk->sk_err = res; 1271 sk->sk_err = -res;
1194 retval = TIPC_OK; 1272 retval = TIPC_OK;
1195 break; 1273 break;
1196 } 1274 }
@@ -1233,10 +1311,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
1233 * For all connectionless messages, by default new queue limits are 1311 * For all connectionless messages, by default new queue limits are
1234 * as belows: 1312 * as belows:
1235 * 1313 *
1236 * TIPC_LOW_IMPORTANCE (5MB) 1314 * TIPC_LOW_IMPORTANCE (4 MB)
1237 * TIPC_MEDIUM_IMPORTANCE (10MB) 1315 * TIPC_MEDIUM_IMPORTANCE (8 MB)
1238 * TIPC_HIGH_IMPORTANCE (20MB) 1316 * TIPC_HIGH_IMPORTANCE (16 MB)
1239 * TIPC_CRITICAL_IMPORTANCE (40MB) 1317 * TIPC_CRITICAL_IMPORTANCE (32 MB)
1240 * 1318 *
1241 * Returns overload limit according to corresponding message importance 1319 * Returns overload limit according to corresponding message importance
1242 */ 1320 */
@@ -1246,9 +1324,10 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
1246 unsigned int limit; 1324 unsigned int limit;
1247 1325
1248 if (msg_connected(msg)) 1326 if (msg_connected(msg))
1249 limit = CONN_OVERLOAD_LIMIT; 1327 limit = sysctl_tipc_rmem[2];
1250 else 1328 else
1251 limit = sk->sk_rcvbuf << (msg_importance(msg) + 5); 1329 limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE <<
1330 msg_importance(msg);
1252 return limit; 1331 return limit;
1253} 1332}
1254 1333
@@ -1327,7 +1406,7 @@ static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
1327 */ 1406 */
1328static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) 1407static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
1329{ 1408{
1330 struct sock *sk = (struct sock *)tport->usr_handle; 1409 struct sock *sk = tport->sk;
1331 u32 res; 1410 u32 res;
1332 1411
1333 /* 1412 /*
@@ -1358,7 +1437,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
1358 */ 1437 */
1359static void wakeupdispatch(struct tipc_port *tport) 1438static void wakeupdispatch(struct tipc_port *tport)
1360{ 1439{
1361 struct sock *sk = (struct sock *)tport->usr_handle; 1440 struct sock *sk = tport->sk;
1362 1441
1363 sk->sk_write_space(sk); 1442 sk->sk_write_space(sk);
1364} 1443}
@@ -1531,7 +1610,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
1531 1610
1532 buf = skb_peek(&sk->sk_receive_queue); 1611 buf = skb_peek(&sk->sk_receive_queue);
1533 1612
1534 res = tipc_create(sock_net(sock->sk), new_sock, 0, 0); 1613 res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
1535 if (res) 1614 if (res)
1536 goto exit; 1615 goto exit;
1537 1616
@@ -1657,8 +1736,8 @@ restart:
1657 * 1736 *
1658 * Returns 0 on success, errno otherwise 1737 * Returns 0 on success, errno otherwise
1659 */ 1738 */
1660static int setsockopt(struct socket *sock, 1739static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
1661 int lvl, int opt, char __user *ov, unsigned int ol) 1740 unsigned int ol)
1662{ 1741{
1663 struct sock *sk = sock->sk; 1742 struct sock *sk = sock->sk;
1664 struct tipc_port *tport = tipc_sk_port(sk); 1743 struct tipc_port *tport = tipc_sk_port(sk);
@@ -1716,8 +1795,8 @@ static int setsockopt(struct socket *sock,
1716 * 1795 *
1717 * Returns 0 on success, errno otherwise 1796 * Returns 0 on success, errno otherwise
1718 */ 1797 */
1719static int getsockopt(struct socket *sock, 1798static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
1720 int lvl, int opt, char __user *ov, int __user *ol) 1799 int __user *ol)
1721{ 1800{
1722 struct sock *sk = sock->sk; 1801 struct sock *sk = sock->sk;
1723 struct tipc_port *tport = tipc_sk_port(sk); 1802 struct tipc_port *tport = tipc_sk_port(sk);
@@ -1841,13 +1920,20 @@ static const struct proto_ops stream_ops = {
1841static const struct net_proto_family tipc_family_ops = { 1920static const struct net_proto_family tipc_family_ops = {
1842 .owner = THIS_MODULE, 1921 .owner = THIS_MODULE,
1843 .family = AF_TIPC, 1922 .family = AF_TIPC,
1844 .create = tipc_create 1923 .create = tipc_sk_create
1845}; 1924};
1846 1925
1847static struct proto tipc_proto = { 1926static struct proto tipc_proto = {
1848 .name = "TIPC", 1927 .name = "TIPC",
1849 .owner = THIS_MODULE, 1928 .owner = THIS_MODULE,
1850 .obj_size = sizeof(struct tipc_sock) 1929 .obj_size = sizeof(struct tipc_sock),
1930 .sysctl_rmem = sysctl_tipc_rmem
1931};
1932
1933static struct proto tipc_proto_kern = {
1934 .name = "TIPC",
1935 .obj_size = sizeof(struct tipc_sock),
1936 .sysctl_rmem = sysctl_tipc_rmem
1851}; 1937};
1852 1938
1853/** 1939/**
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 6b42d47029af..d38bb45d82e9 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -2,7 +2,7 @@
2 * net/tipc/subscr.c: TIPC network topology service 2 * net/tipc/subscr.c: TIPC network topology service
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2006, Ericsson AB
5 * Copyright (c) 2005-2007, 2010-2011, Wind River Systems 5 * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -41,33 +41,42 @@
41 41
42/** 42/**
43 * struct tipc_subscriber - TIPC network topology subscriber 43 * struct tipc_subscriber - TIPC network topology subscriber
44 * @port_ref: object reference to server port connecting to subscriber 44 * @conid: connection identifier to server connecting to subscriber
45 * @lock: pointer to spinlock controlling access to subscriber's server port 45 * @lock: controll access to subscriber
46 * @subscriber_list: adjacent subscribers in top. server's list of subscribers
47 * @subscription_list: list of subscription objects for this subscriber 46 * @subscription_list: list of subscription objects for this subscriber
48 */ 47 */
49struct tipc_subscriber { 48struct tipc_subscriber {
50 u32 port_ref; 49 int conid;
51 spinlock_t *lock; 50 spinlock_t lock;
52 struct list_head subscriber_list;
53 struct list_head subscription_list; 51 struct list_head subscription_list;
54}; 52};
55 53
56/** 54static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr,
57 * struct top_srv - TIPC network topology subscription service 55 void *usr_data, void *buf, size_t len);
58 * @setup_port: reference to TIPC port that handles subscription requests 56static void *subscr_named_msg_event(int conid);
59 * @subscription_count: number of active subscriptions (not subscribers!) 57static void subscr_conn_shutdown_event(int conid, void *usr_data);
60 * @subscriber_list: list of ports subscribing to service 58
61 * @lock: spinlock govering access to subscriber list 59static atomic_t subscription_count = ATOMIC_INIT(0);
62 */ 60
63struct top_srv { 61static struct sockaddr_tipc topsrv_addr __read_mostly = {
64 u32 setup_port; 62 .family = AF_TIPC,
65 atomic_t subscription_count; 63 .addrtype = TIPC_ADDR_NAMESEQ,
66 struct list_head subscriber_list; 64 .addr.nameseq.type = TIPC_TOP_SRV,
67 spinlock_t lock; 65 .addr.nameseq.lower = TIPC_TOP_SRV,
66 .addr.nameseq.upper = TIPC_TOP_SRV,
67 .scope = TIPC_NODE_SCOPE
68}; 68};
69 69
70static struct top_srv topsrv; 70static struct tipc_server topsrv __read_mostly = {
71 .saddr = &topsrv_addr,
72 .imp = TIPC_CRITICAL_IMPORTANCE,
73 .type = SOCK_SEQPACKET,
74 .max_rcvbuf_size = sizeof(struct tipc_subscr),
75 .name = "topology_server",
76 .tipc_conn_recvmsg = subscr_conn_msg_event,
77 .tipc_conn_new = subscr_named_msg_event,
78 .tipc_conn_shutdown = subscr_conn_shutdown_event,
79};
71 80
72/** 81/**
73 * htohl - convert value to endianness used by destination 82 * htohl - convert value to endianness used by destination
@@ -81,20 +90,13 @@ static u32 htohl(u32 in, int swap)
81 return swap ? swab32(in) : in; 90 return swap ? swab32(in) : in;
82} 91}
83 92
84/** 93static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower,
85 * subscr_send_event - send a message containing a tipc_event to the subscriber 94 u32 found_upper, u32 event, u32 port_ref,
86 *
87 * Note: Must not hold subscriber's server port lock, since tipc_send() will
88 * try to take the lock if the message is rejected and returned!
89 */
90static void subscr_send_event(struct tipc_subscription *sub,
91 u32 found_lower,
92 u32 found_upper,
93 u32 event,
94 u32 port_ref,
95 u32 node) 95 u32 node)
96{ 96{
97 struct iovec msg_sect; 97 struct tipc_subscriber *subscriber = sub->subscriber;
98 struct kvec msg_sect;
99 int ret;
98 100
99 msg_sect.iov_base = (void *)&sub->evt; 101 msg_sect.iov_base = (void *)&sub->evt;
100 msg_sect.iov_len = sizeof(struct tipc_event); 102 msg_sect.iov_len = sizeof(struct tipc_event);
@@ -104,7 +106,10 @@ static void subscr_send_event(struct tipc_subscription *sub,
104 sub->evt.found_upper = htohl(found_upper, sub->swap); 106 sub->evt.found_upper = htohl(found_upper, sub->swap);
105 sub->evt.port.ref = htohl(port_ref, sub->swap); 107 sub->evt.port.ref = htohl(port_ref, sub->swap);
106 sub->evt.port.node = htohl(node, sub->swap); 108 sub->evt.port.node = htohl(node, sub->swap);
107 tipc_send(sub->server_ref, 1, &msg_sect, msg_sect.iov_len); 109 ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL,
110 msg_sect.iov_base, msg_sect.iov_len);
111 if (ret < 0)
112 pr_err("Sending subscription event failed, no memory\n");
108} 113}
109 114
110/** 115/**
@@ -112,10 +117,8 @@ static void subscr_send_event(struct tipc_subscription *sub,
112 * 117 *
113 * Returns 1 if there is overlap, otherwise 0. 118 * Returns 1 if there is overlap, otherwise 0.
114 */ 119 */
115int tipc_subscr_overlap(struct tipc_subscription *sub, 120int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower,
116 u32 found_lower,
117 u32 found_upper) 121 u32 found_upper)
118
119{ 122{
120 if (found_lower < sub->seq.lower) 123 if (found_lower < sub->seq.lower)
121 found_lower = sub->seq.lower; 124 found_lower = sub->seq.lower;
@@ -131,13 +134,9 @@ int tipc_subscr_overlap(struct tipc_subscription *sub,
131 * 134 *
132 * Protected by nameseq.lock in name_table.c 135 * Protected by nameseq.lock in name_table.c
133 */ 136 */
134void tipc_subscr_report_overlap(struct tipc_subscription *sub, 137void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower,
135 u32 found_lower, 138 u32 found_upper, u32 event, u32 port_ref,
136 u32 found_upper, 139 u32 node, int must)
137 u32 event,
138 u32 port_ref,
139 u32 node,
140 int must)
141{ 140{
142 if (!tipc_subscr_overlap(sub, found_lower, found_upper)) 141 if (!tipc_subscr_overlap(sub, found_lower, found_upper))
143 return; 142 return;
@@ -147,21 +146,24 @@ void tipc_subscr_report_overlap(struct tipc_subscription *sub,
147 subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); 146 subscr_send_event(sub, found_lower, found_upper, event, port_ref, node);
148} 147}
149 148
150/**
151 * subscr_timeout - subscription timeout has occurred
152 */
153static void subscr_timeout(struct tipc_subscription *sub) 149static void subscr_timeout(struct tipc_subscription *sub)
154{ 150{
155 struct tipc_port *server_port; 151 struct tipc_subscriber *subscriber = sub->subscriber;
152
153 /* The spin lock per subscriber is used to protect its members */
154 spin_lock_bh(&subscriber->lock);
156 155
157 /* Validate server port reference (in case subscriber is terminating) */ 156 /* Validate if the connection related to the subscriber is
158 server_port = tipc_port_lock(sub->server_ref); 157 * closed (in case subscriber is terminating)
159 if (server_port == NULL) 158 */
159 if (subscriber->conid == 0) {
160 spin_unlock_bh(&subscriber->lock);
160 return; 161 return;
162 }
161 163
162 /* Validate timeout (in case subscription is being cancelled) */ 164 /* Validate timeout (in case subscription is being cancelled) */
163 if (sub->timeout == TIPC_WAIT_FOREVER) { 165 if (sub->timeout == TIPC_WAIT_FOREVER) {
164 tipc_port_unlock(server_port); 166 spin_unlock_bh(&subscriber->lock);
165 return; 167 return;
166 } 168 }
167 169
@@ -171,8 +173,7 @@ static void subscr_timeout(struct tipc_subscription *sub)
171 /* Unlink subscription from subscriber */ 173 /* Unlink subscription from subscriber */
172 list_del(&sub->subscription_list); 174 list_del(&sub->subscription_list);
173 175
174 /* Release subscriber's server port */ 176 spin_unlock_bh(&subscriber->lock);
175 tipc_port_unlock(server_port);
176 177
177 /* Notify subscriber of timeout */ 178 /* Notify subscriber of timeout */
178 subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, 179 subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
@@ -181,64 +182,54 @@ static void subscr_timeout(struct tipc_subscription *sub)
181 /* Now destroy subscription */ 182 /* Now destroy subscription */
182 k_term_timer(&sub->timer); 183 k_term_timer(&sub->timer);
183 kfree(sub); 184 kfree(sub);
184 atomic_dec(&topsrv.subscription_count); 185 atomic_dec(&subscription_count);
185} 186}
186 187
187/** 188/**
188 * subscr_del - delete a subscription within a subscription list 189 * subscr_del - delete a subscription within a subscription list
189 * 190 *
190 * Called with subscriber port locked. 191 * Called with subscriber lock held.
191 */ 192 */
192static void subscr_del(struct tipc_subscription *sub) 193static void subscr_del(struct tipc_subscription *sub)
193{ 194{
194 tipc_nametbl_unsubscribe(sub); 195 tipc_nametbl_unsubscribe(sub);
195 list_del(&sub->subscription_list); 196 list_del(&sub->subscription_list);
196 kfree(sub); 197 kfree(sub);
197 atomic_dec(&topsrv.subscription_count); 198 atomic_dec(&subscription_count);
198} 199}
199 200
200/** 201/**
201 * subscr_terminate - terminate communication with a subscriber 202 * subscr_terminate - terminate communication with a subscriber
202 * 203 *
203 * Called with subscriber port locked. Routine must temporarily release lock 204 * Note: Must call it in process context since it might sleep.
204 * to enable subscription timeout routine(s) to finish without deadlocking;
205 * the lock is then reclaimed to allow caller to release it upon return.
206 * (This should work even in the unlikely event some other thread creates
207 * a new object reference in the interim that uses this lock; this routine will
208 * simply wait for it to be released, then claim it.)
209 */ 205 */
210static void subscr_terminate(struct tipc_subscriber *subscriber) 206static void subscr_terminate(struct tipc_subscriber *subscriber)
211{ 207{
212 u32 port_ref; 208 tipc_conn_terminate(&topsrv, subscriber->conid);
209}
210
211static void subscr_release(struct tipc_subscriber *subscriber)
212{
213 struct tipc_subscription *sub; 213 struct tipc_subscription *sub;
214 struct tipc_subscription *sub_temp; 214 struct tipc_subscription *sub_temp;
215 215
216 /* Invalidate subscriber reference */ 216 spin_lock_bh(&subscriber->lock);
217 port_ref = subscriber->port_ref;
218 subscriber->port_ref = 0;
219 spin_unlock_bh(subscriber->lock);
220 217
221 /* Sever connection to subscriber */ 218 /* Invalidate subscriber reference */
222 tipc_shutdown(port_ref); 219 subscriber->conid = 0;
223 tipc_deleteport(port_ref);
224 220
225 /* Destroy any existing subscriptions for subscriber */ 221 /* Destroy any existing subscriptions for subscriber */
226 list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, 222 list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
227 subscription_list) { 223 subscription_list) {
228 if (sub->timeout != TIPC_WAIT_FOREVER) { 224 if (sub->timeout != TIPC_WAIT_FOREVER) {
225 spin_unlock_bh(&subscriber->lock);
229 k_cancel_timer(&sub->timer); 226 k_cancel_timer(&sub->timer);
230 k_term_timer(&sub->timer); 227 k_term_timer(&sub->timer);
228 spin_lock_bh(&subscriber->lock);
231 } 229 }
232 subscr_del(sub); 230 subscr_del(sub);
233 } 231 }
234 232 spin_unlock_bh(&subscriber->lock);
235 /* Remove subscriber from topology server's subscriber list */
236 spin_lock_bh(&topsrv.lock);
237 list_del(&subscriber->subscriber_list);
238 spin_unlock_bh(&topsrv.lock);
239
240 /* Reclaim subscriber lock */
241 spin_lock_bh(subscriber->lock);
242 233
243 /* Now destroy subscriber */ 234 /* Now destroy subscriber */
244 kfree(subscriber); 235 kfree(subscriber);
@@ -247,7 +238,7 @@ static void subscr_terminate(struct tipc_subscriber *subscriber)
247/** 238/**
248 * subscr_cancel - handle subscription cancellation request 239 * subscr_cancel - handle subscription cancellation request
249 * 240 *
250 * Called with subscriber port locked. Routine must temporarily release lock 241 * Called with subscriber lock held. Routine must temporarily release lock
251 * to enable the subscription timeout routine to finish without deadlocking; 242 * to enable the subscription timeout routine to finish without deadlocking;
252 * the lock is then reclaimed to allow caller to release it upon return. 243 * the lock is then reclaimed to allow caller to release it upon return.
253 * 244 *
@@ -274,10 +265,10 @@ static void subscr_cancel(struct tipc_subscr *s,
274 /* Cancel subscription timer (if used), then delete subscription */ 265 /* Cancel subscription timer (if used), then delete subscription */
275 if (sub->timeout != TIPC_WAIT_FOREVER) { 266 if (sub->timeout != TIPC_WAIT_FOREVER) {
276 sub->timeout = TIPC_WAIT_FOREVER; 267 sub->timeout = TIPC_WAIT_FOREVER;
277 spin_unlock_bh(subscriber->lock); 268 spin_unlock_bh(&subscriber->lock);
278 k_cancel_timer(&sub->timer); 269 k_cancel_timer(&sub->timer);
279 k_term_timer(&sub->timer); 270 k_term_timer(&sub->timer);
280 spin_lock_bh(subscriber->lock); 271 spin_lock_bh(&subscriber->lock);
281 } 272 }
282 subscr_del(sub); 273 subscr_del(sub);
283} 274}
@@ -285,7 +276,7 @@ static void subscr_cancel(struct tipc_subscr *s,
285/** 276/**
286 * subscr_subscribe - create subscription for subscriber 277 * subscr_subscribe - create subscription for subscriber
287 * 278 *
288 * Called with subscriber port locked. 279 * Called with subscriber lock held.
289 */ 280 */
290static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, 281static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
291 struct tipc_subscriber *subscriber) 282 struct tipc_subscriber *subscriber)
@@ -304,7 +295,7 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
304 } 295 }
305 296
306 /* Refuse subscription if global limit exceeded */ 297 /* Refuse subscription if global limit exceeded */
307 if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { 298 if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
308 pr_warn("Subscription rejected, limit reached (%u)\n", 299 pr_warn("Subscription rejected, limit reached (%u)\n",
309 TIPC_MAX_SUBSCRIPTIONS); 300 TIPC_MAX_SUBSCRIPTIONS);
310 subscr_terminate(subscriber); 301 subscr_terminate(subscriber);
@@ -335,10 +326,10 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
335 } 326 }
336 INIT_LIST_HEAD(&sub->nameseq_list); 327 INIT_LIST_HEAD(&sub->nameseq_list);
337 list_add(&sub->subscription_list, &subscriber->subscription_list); 328 list_add(&sub->subscription_list, &subscriber->subscription_list);
338 sub->server_ref = subscriber->port_ref; 329 sub->subscriber = subscriber;
339 sub->swap = swap; 330 sub->swap = swap;
340 memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); 331 memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
341 atomic_inc(&topsrv.subscription_count); 332 atomic_inc(&subscription_count);
342 if (sub->timeout != TIPC_WAIT_FOREVER) { 333 if (sub->timeout != TIPC_WAIT_FOREVER) {
343 k_init_timer(&sub->timer, 334 k_init_timer(&sub->timer,
344 (Handler)subscr_timeout, (unsigned long)sub); 335 (Handler)subscr_timeout, (unsigned long)sub);
@@ -348,196 +339,51 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
348 return sub; 339 return sub;
349} 340}
350 341
351/** 342/* Handle one termination request for the subscriber */
352 * subscr_conn_shutdown_event - handle termination request from subscriber 343static void subscr_conn_shutdown_event(int conid, void *usr_data)
353 *
354 * Called with subscriber's server port unlocked.
355 */
356static void subscr_conn_shutdown_event(void *usr_handle,
357 u32 port_ref,
358 struct sk_buff **buf,
359 unsigned char const *data,
360 unsigned int size,
361 int reason)
362{ 344{
363 struct tipc_subscriber *subscriber = usr_handle; 345 subscr_release((struct tipc_subscriber *)usr_data);
364 spinlock_t *subscriber_lock;
365
366 if (tipc_port_lock(port_ref) == NULL)
367 return;
368
369 subscriber_lock = subscriber->lock;
370 subscr_terminate(subscriber);
371 spin_unlock_bh(subscriber_lock);
372} 346}
373 347
374/** 348/* Handle one request to create a new subscription for the subscriber */
375 * subscr_conn_msg_event - handle new subscription request from subscriber 349static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr,
376 * 350 void *usr_data, void *buf, size_t len)
377 * Called with subscriber's server port unlocked.
378 */
379static void subscr_conn_msg_event(void *usr_handle,
380 u32 port_ref,
381 struct sk_buff **buf,
382 const unchar *data,
383 u32 size)
384{ 351{
385 struct tipc_subscriber *subscriber = usr_handle; 352 struct tipc_subscriber *subscriber = usr_data;
386 spinlock_t *subscriber_lock;
387 struct tipc_subscription *sub; 353 struct tipc_subscription *sub;
388 354
389 /* 355 spin_lock_bh(&subscriber->lock);
390 * Lock subscriber's server port (& make a local copy of lock pointer, 356 sub = subscr_subscribe((struct tipc_subscr *)buf, subscriber);
391 * in case subscriber is deleted while processing subscription request) 357 if (sub)
392 */ 358 tipc_nametbl_subscribe(sub);
393 if (tipc_port_lock(port_ref) == NULL) 359 spin_unlock_bh(&subscriber->lock);
394 return;
395
396 subscriber_lock = subscriber->lock;
397
398 if (size != sizeof(struct tipc_subscr)) {
399 subscr_terminate(subscriber);
400 spin_unlock_bh(subscriber_lock);
401 } else {
402 sub = subscr_subscribe((struct tipc_subscr *)data, subscriber);
403 spin_unlock_bh(subscriber_lock);
404 if (sub != NULL) {
405
406 /*
407 * We must release the server port lock before adding a
408 * subscription to the name table since TIPC needs to be
409 * able to (re)acquire the port lock if an event message
410 * issued by the subscription process is rejected and
411 * returned. The subscription cannot be deleted while
412 * it is being added to the name table because:
413 * a) the single-threading of the native API port code
414 * ensures the subscription cannot be cancelled and
415 * the subscriber connection cannot be broken, and
416 * b) the name table lock ensures the subscription
417 * timeout code cannot delete the subscription,
418 * so the subscription object is still protected.
419 */
420 tipc_nametbl_subscribe(sub);
421 }
422 }
423} 360}
424 361
425/** 362
426 * subscr_named_msg_event - handle request to establish a new subscriber 363/* Handle one request to establish a new subscriber */
427 */ 364static void *subscr_named_msg_event(int conid)
428static void subscr_named_msg_event(void *usr_handle,
429 u32 port_ref,
430 struct sk_buff **buf,
431 const unchar *data,
432 u32 size,
433 u32 importance,
434 struct tipc_portid const *orig,
435 struct tipc_name_seq const *dest)
436{ 365{
437 struct tipc_subscriber *subscriber; 366 struct tipc_subscriber *subscriber;
438 u32 server_port_ref;
439 367
440 /* Create subscriber object */ 368 /* Create subscriber object */
441 subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC); 369 subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC);
442 if (subscriber == NULL) { 370 if (subscriber == NULL) {
443 pr_warn("Subscriber rejected, no memory\n"); 371 pr_warn("Subscriber rejected, no memory\n");
444 return; 372 return NULL;
445 } 373 }
446 INIT_LIST_HEAD(&subscriber->subscription_list); 374 INIT_LIST_HEAD(&subscriber->subscription_list);
447 INIT_LIST_HEAD(&subscriber->subscriber_list); 375 subscriber->conid = conid;
448 376 spin_lock_init(&subscriber->lock);
449 /* Create server port & establish connection to subscriber */
450 tipc_createport(subscriber,
451 importance,
452 NULL,
453 NULL,
454 subscr_conn_shutdown_event,
455 NULL,
456 NULL,
457 subscr_conn_msg_event,
458 NULL,
459 &subscriber->port_ref);
460 if (subscriber->port_ref == 0) {
461 pr_warn("Subscriber rejected, unable to create port\n");
462 kfree(subscriber);
463 return;
464 }
465 tipc_connect(subscriber->port_ref, orig);
466
467 /* Lock server port (& save lock address for future use) */
468 subscriber->lock = tipc_port_lock(subscriber->port_ref)->lock;
469
470 /* Add subscriber to topology server's subscriber list */
471 spin_lock_bh(&topsrv.lock);
472 list_add(&subscriber->subscriber_list, &topsrv.subscriber_list);
473 spin_unlock_bh(&topsrv.lock);
474
475 /* Unlock server port */
476 server_port_ref = subscriber->port_ref;
477 spin_unlock_bh(subscriber->lock);
478
479 /* Send an ACK- to complete connection handshaking */
480 tipc_send(server_port_ref, 0, NULL, 0);
481 377
482 /* Handle optional subscription request */ 378 return (void *)subscriber;
483 if (size != 0) {
484 subscr_conn_msg_event(subscriber, server_port_ref,
485 buf, data, size);
486 }
487} 379}
488 380
489int tipc_subscr_start(void) 381int tipc_subscr_start(void)
490{ 382{
491 struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV}; 383 return tipc_server_start(&topsrv);
492 int res;
493
494 spin_lock_init(&topsrv.lock);
495 INIT_LIST_HEAD(&topsrv.subscriber_list);
496
497 res = tipc_createport(NULL,
498 TIPC_CRITICAL_IMPORTANCE,
499 NULL,
500 NULL,
501 NULL,
502 NULL,
503 subscr_named_msg_event,
504 NULL,
505 NULL,
506 &topsrv.setup_port);
507 if (res)
508 goto failed;
509
510 res = tipc_publish(topsrv.setup_port, TIPC_NODE_SCOPE, &seq);
511 if (res) {
512 tipc_deleteport(topsrv.setup_port);
513 topsrv.setup_port = 0;
514 goto failed;
515 }
516
517 return 0;
518
519failed:
520 pr_err("Failed to create subscription service\n");
521 return res;
522} 384}
523 385
524void tipc_subscr_stop(void) 386void tipc_subscr_stop(void)
525{ 387{
526 struct tipc_subscriber *subscriber; 388 tipc_server_stop(&topsrv);
527 struct tipc_subscriber *subscriber_temp;
528 spinlock_t *subscriber_lock;
529
530 if (topsrv.setup_port) {
531 tipc_deleteport(topsrv.setup_port);
532 topsrv.setup_port = 0;
533
534 list_for_each_entry_safe(subscriber, subscriber_temp,
535 &topsrv.subscriber_list,
536 subscriber_list) {
537 subscriber_lock = subscriber->lock;
538 spin_lock_bh(subscriber_lock);
539 subscr_terminate(subscriber);
540 spin_unlock_bh(subscriber_lock);
541 }
542 }
543} 389}
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 218d2e07f0cc..393e417bee3f 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -2,7 +2,7 @@
2 * net/tipc/subscr.h: Include file for TIPC network topology service 2 * net/tipc/subscr.h: Include file for TIPC network topology service
3 * 3 *
4 * Copyright (c) 2003-2006, Ericsson AB 4 * Copyright (c) 2003-2006, Ericsson AB
5 * Copyright (c) 2005-2007, Wind River Systems 5 * Copyright (c) 2005-2007, 2012-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -37,10 +37,14 @@
37#ifndef _TIPC_SUBSCR_H 37#ifndef _TIPC_SUBSCR_H
38#define _TIPC_SUBSCR_H 38#define _TIPC_SUBSCR_H
39 39
40#include "server.h"
41
40struct tipc_subscription; 42struct tipc_subscription;
43struct tipc_subscriber;
41 44
42/** 45/**
43 * struct tipc_subscription - TIPC network topology subscription object 46 * struct tipc_subscription - TIPC network topology subscription object
47 * @subscriber: pointer to its subscriber
44 * @seq: name sequence associated with subscription 48 * @seq: name sequence associated with subscription
45 * @timeout: duration of subscription (in ms) 49 * @timeout: duration of subscription (in ms)
46 * @filter: event filtering to be done for subscription 50 * @filter: event filtering to be done for subscription
@@ -52,28 +56,23 @@ struct tipc_subscription;
52 * @evt: template for events generated by subscription 56 * @evt: template for events generated by subscription
53 */ 57 */
54struct tipc_subscription { 58struct tipc_subscription {
59 struct tipc_subscriber *subscriber;
55 struct tipc_name_seq seq; 60 struct tipc_name_seq seq;
56 u32 timeout; 61 u32 timeout;
57 u32 filter; 62 u32 filter;
58 struct timer_list timer; 63 struct timer_list timer;
59 struct list_head nameseq_list; 64 struct list_head nameseq_list;
60 struct list_head subscription_list; 65 struct list_head subscription_list;
61 u32 server_ref;
62 int swap; 66 int swap;
63 struct tipc_event evt; 67 struct tipc_event evt;
64}; 68};
65 69
66int tipc_subscr_overlap(struct tipc_subscription *sub, 70int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower,
67 u32 found_lower,
68 u32 found_upper); 71 u32 found_upper);
69 72
70void tipc_subscr_report_overlap(struct tipc_subscription *sub, 73void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower,
71 u32 found_lower, 74 u32 found_upper, u32 event, u32 port_ref,
72 u32 found_upper, 75 u32 node, int must);
73 u32 event,
74 u32 port_ref,
75 u32 node,
76 int must_report);
77 76
78int tipc_subscr_start(void); 77int tipc_subscr_start(void);
79 78
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
new file mode 100644
index 000000000000..f3fef93325a8
--- /dev/null
+++ b/net/tipc/sysctl.c
@@ -0,0 +1,64 @@
1/*
2 * net/tipc/sysctl.c: sysctl interface to TIPC subsystem
3 *
4 * Copyright (c) 2013, Wind River Systems
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include "core.h"
37
38#include <linux/sysctl.h>
39
40static struct ctl_table_header *tipc_ctl_hdr;
41
42static struct ctl_table tipc_table[] = {
43 {
44 .procname = "tipc_rmem",
45 .data = &sysctl_tipc_rmem,
46 .maxlen = sizeof(sysctl_tipc_rmem),
47 .mode = 0644,
48 .proc_handler = proc_dointvec,
49 },
50 {}
51};
52
53int tipc_register_sysctl(void)
54{
55 tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table);
56 if (tipc_ctl_hdr == NULL)
57 return -ENOMEM;
58 return 0;
59}
60
61void tipc_unregister_sysctl(void)
62{
63 unregister_net_sysctl_table(tipc_ctl_hdr);
64}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 826e09938bff..86de99ad2976 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -114,6 +114,7 @@
114#include <linux/mount.h> 114#include <linux/mount.h>
115#include <net/checksum.h> 115#include <net/checksum.h>
116#include <linux/security.h> 116#include <linux/security.h>
117#include <linux/freezer.h>
117 118
118struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; 119struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
119EXPORT_SYMBOL_GPL(unix_socket_table); 120EXPORT_SYMBOL_GPL(unix_socket_table);
@@ -1478,7 +1479,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1478 MAX_SKB_FRAGS * PAGE_SIZE); 1479 MAX_SKB_FRAGS * PAGE_SIZE);
1479 1480
1480 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1481 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1481 msg->msg_flags & MSG_DONTWAIT, &err); 1482 msg->msg_flags & MSG_DONTWAIT, &err,
1483 PAGE_ALLOC_COSTLY_ORDER);
1482 if (skb == NULL) 1484 if (skb == NULL)
1483 goto out; 1485 goto out;
1484 1486
@@ -1595,6 +1597,10 @@ out:
1595 return err; 1597 return err;
1596} 1598}
1597 1599
1600/* We use paged skbs for stream sockets, and limit occupancy to 32768
1601 * bytes, and a minimun of a full page.
1602 */
1603#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1598 1604
1599static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, 1605static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1600 struct msghdr *msg, size_t len) 1606 struct msghdr *msg, size_t len)
@@ -1608,6 +1614,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1608 struct scm_cookie tmp_scm; 1614 struct scm_cookie tmp_scm;
1609 bool fds_sent = false; 1615 bool fds_sent = false;
1610 int max_level; 1616 int max_level;
1617 int data_len;
1611 1618
1612 if (NULL == siocb->scm) 1619 if (NULL == siocb->scm)
1613 siocb->scm = &tmp_scm; 1620 siocb->scm = &tmp_scm;
@@ -1634,40 +1641,22 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1634 goto pipe_err; 1641 goto pipe_err;
1635 1642
1636 while (sent < len) { 1643 while (sent < len) {
1637 /* 1644 size = len - sent;
1638 * Optimisation for the fact that under 0.01% of X
1639 * messages typically need breaking up.
1640 */
1641
1642 size = len-sent;
1643 1645
1644 /* Keep two messages in the pipe so it schedules better */ 1646 /* Keep two messages in the pipe so it schedules better */
1645 if (size > ((sk->sk_sndbuf >> 1) - 64)) 1647 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1646 size = (sk->sk_sndbuf >> 1) - 64;
1647 1648
1648 if (size > SKB_MAX_ALLOC) 1649 /* allow fallback to order-0 allocations */
1649 size = SKB_MAX_ALLOC; 1650 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1650 1651
1651 /* 1652 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1652 * Grab a buffer
1653 */
1654 1653
1655 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT, 1654 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1656 &err); 1655 msg->msg_flags & MSG_DONTWAIT, &err,
1657 1656 get_order(UNIX_SKB_FRAGS_SZ));
1658 if (skb == NULL) 1657 if (!skb)
1659 goto out_err; 1658 goto out_err;
1660 1659
1661 /*
1662 * If you pass two values to the sock_alloc_send_skb
1663 * it tries to grab the large buffer with GFP_NOFS
1664 * (which can fail easily), and if it fails grab the
1665 * fallback size buffer which is under a page and will
1666 * succeed. [Alan]
1667 */
1668 size = min_t(int, size, skb_tailroom(skb));
1669
1670
1671 /* Only send the fds in the first buffer */ 1660 /* Only send the fds in the first buffer */
1672 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); 1661 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1673 if (err < 0) { 1662 if (err < 0) {
@@ -1677,7 +1666,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1677 max_level = err + 1; 1666 max_level = err + 1;
1678 fds_sent = true; 1667 fds_sent = true;
1679 1668
1680 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 1669 skb_put(skb, size - data_len);
1670 skb->data_len = data_len;
1671 skb->len = size;
1672 err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov,
1673 sent, size);
1681 if (err) { 1674 if (err) {
1682 kfree_skb(skb); 1675 kfree_skb(skb);
1683 goto out_err; 1676 goto out_err;
@@ -1879,7 +1872,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
1879 1872
1880 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1873 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1881 unix_state_unlock(sk); 1874 unix_state_unlock(sk);
1882 timeo = schedule_timeout(timeo); 1875 timeo = freezable_schedule_timeout(timeo);
1883 unix_state_lock(sk); 1876 unix_state_lock(sk);
1884 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1877 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1885 } 1878 }
@@ -1889,6 +1882,11 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
1889 return timeo; 1882 return timeo;
1890} 1883}
1891 1884
1885static unsigned int unix_skb_len(const struct sk_buff *skb)
1886{
1887 return skb->len - UNIXCB(skb).consumed;
1888}
1889
1892static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, 1890static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1893 struct msghdr *msg, size_t size, 1891 struct msghdr *msg, size_t size,
1894 int flags) 1892 int flags)
@@ -1976,8 +1974,8 @@ again:
1976 } 1974 }
1977 1975
1978 skip = sk_peek_offset(sk, flags); 1976 skip = sk_peek_offset(sk, flags);
1979 while (skip >= skb->len) { 1977 while (skip >= unix_skb_len(skb)) {
1980 skip -= skb->len; 1978 skip -= unix_skb_len(skb);
1981 last = skb; 1979 last = skb;
1982 skb = skb_peek_next(skb, &sk->sk_receive_queue); 1980 skb = skb_peek_next(skb, &sk->sk_receive_queue);
1983 if (!skb) 1981 if (!skb)
@@ -2004,8 +2002,9 @@ again:
2004 sunaddr = NULL; 2002 sunaddr = NULL;
2005 } 2003 }
2006 2004
2007 chunk = min_t(unsigned int, skb->len - skip, size); 2005 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2008 if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) { 2006 if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
2007 msg->msg_iov, chunk)) {
2009 if (copied == 0) 2008 if (copied == 0)
2010 copied = -EFAULT; 2009 copied = -EFAULT;
2011 break; 2010 break;
@@ -2015,14 +2014,14 @@ again:
2015 2014
2016 /* Mark read part of skb as used */ 2015 /* Mark read part of skb as used */
2017 if (!(flags & MSG_PEEK)) { 2016 if (!(flags & MSG_PEEK)) {
2018 skb_pull(skb, chunk); 2017 UNIXCB(skb).consumed += chunk;
2019 2018
2020 sk_peek_offset_bwd(sk, chunk); 2019 sk_peek_offset_bwd(sk, chunk);
2021 2020
2022 if (UNIXCB(skb).fp) 2021 if (UNIXCB(skb).fp)
2023 unix_detach_fds(siocb->scm, skb); 2022 unix_detach_fds(siocb->scm, skb);
2024 2023
2025 if (skb->len) 2024 if (unix_skb_len(skb))
2026 break; 2025 break;
2027 2026
2028 skb_unlink(skb, &sk->sk_receive_queue); 2027 skb_unlink(skb, &sk->sk_receive_queue);
@@ -2106,7 +2105,7 @@ long unix_inq_len(struct sock *sk)
2106 if (sk->sk_type == SOCK_STREAM || 2105 if (sk->sk_type == SOCK_STREAM ||
2107 sk->sk_type == SOCK_SEQPACKET) { 2106 sk->sk_type == SOCK_SEQPACKET) {
2108 skb_queue_walk(&sk->sk_receive_queue, skb) 2107 skb_queue_walk(&sk->sk_receive_queue, skb)
2109 amount += skb->len; 2108 amount += unix_skb_len(skb);
2110 } else { 2109 } else {
2111 skb = skb_peek(&sk->sk_receive_queue); 2110 skb = skb_peek(&sk->sk_receive_queue);
2112 if (skb) 2111 if (skb)
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 8800604c93f4..b3d515021b74 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -15,7 +15,7 @@
15 15
16#include <net/af_unix.h> 16#include <net/af_unix.h>
17 17
18static ctl_table unix_table[] = { 18static struct ctl_table unix_table[] = {
19 { 19 {
20 .procname = "max_dgram_qlen", 20 .procname = "max_dgram_qlen",
21 .data = &init_net.unx.sysctl_max_dgram_qlen, 21 .data = &init_net.unx.sysctl_max_dgram_qlen,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3f77f42a3b58..545c08b8a1d4 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -96,8 +96,7 @@
96#include <linux/wait.h> 96#include <linux/wait.h>
97#include <linux/workqueue.h> 97#include <linux/workqueue.h>
98#include <net/sock.h> 98#include <net/sock.h>
99 99#include <net/af_vsock.h>
100#include "af_vsock.h"
101 100
102static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); 101static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
103static void vsock_sk_destruct(struct sock *sk); 102static void vsock_sk_destruct(struct sock *sk);
@@ -144,18 +143,18 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
144 * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through 143 * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
145 * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and 144 * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
146 * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function 145 * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function
147 * mods with VSOCK_HASH_SIZE - 1 to ensure this. 146 * mods with VSOCK_HASH_SIZE to ensure this.
148 */ 147 */
149#define VSOCK_HASH_SIZE 251 148#define VSOCK_HASH_SIZE 251
150#define MAX_PORT_RETRIES 24 149#define MAX_PORT_RETRIES 24
151 150
152#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) 151#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE)
153#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) 152#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
154#define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) 153#define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE])
155 154
156/* XXX This can probably be implemented in a better way. */ 155/* XXX This can probably be implemented in a better way. */
157#define VSOCK_CONN_HASH(src, dst) \ 156#define VSOCK_CONN_HASH(src, dst) \
158 (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) 157 (((src)->svm_cid ^ (dst)->svm_port) % VSOCK_HASH_SIZE)
159#define vsock_connected_sockets(src, dst) \ 158#define vsock_connected_sockets(src, dst) \
160 (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) 159 (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)])
161#define vsock_connected_sockets_vsk(vsk) \ 160#define vsock_connected_sockets_vsk(vsk) \
@@ -165,6 +164,18 @@ static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
165static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; 164static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
166static DEFINE_SPINLOCK(vsock_table_lock); 165static DEFINE_SPINLOCK(vsock_table_lock);
167 166
167/* Autobind this socket to the local address if necessary. */
168static int vsock_auto_bind(struct vsock_sock *vsk)
169{
170 struct sock *sk = sk_vsock(vsk);
171 struct sockaddr_vm local_addr;
172
173 if (vsock_addr_bound(&vsk->local_addr))
174 return 0;
175 vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
176 return __vsock_bind(sk, &local_addr);
177}
178
168static void vsock_init_tables(void) 179static void vsock_init_tables(void)
169{ 180{
170 int i; 181 int i;
@@ -335,7 +346,7 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
335 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { 346 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
336 struct vsock_sock *vsk; 347 struct vsock_sock *vsk;
337 list_for_each_entry(vsk, &vsock_connected_table[i], 348 list_for_each_entry(vsk, &vsock_connected_table[i],
338 connected_table); 349 connected_table)
339 fn(sk_vsock(vsk)); 350 fn(sk_vsock(vsk));
340 } 351 }
341 352
@@ -956,15 +967,10 @@ static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
956 967
957 lock_sock(sk); 968 lock_sock(sk);
958 969
959 if (!vsock_addr_bound(&vsk->local_addr)) { 970 err = vsock_auto_bind(vsk);
960 struct sockaddr_vm local_addr; 971 if (err)
961 972 goto out;
962 vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
963 err = __vsock_bind(sk, &local_addr);
964 if (err != 0)
965 goto out;
966 973
967 }
968 974
969 /* If the provided message contains an address, use that. Otherwise 975 /* If the provided message contains an address, use that. Otherwise
970 * fall back on the socket's remote handle (if it has been connected). 976 * fall back on the socket's remote handle (if it has been connected).
@@ -1038,15 +1044,9 @@ static int vsock_dgram_connect(struct socket *sock,
1038 1044
1039 lock_sock(sk); 1045 lock_sock(sk);
1040 1046
1041 if (!vsock_addr_bound(&vsk->local_addr)) { 1047 err = vsock_auto_bind(vsk);
1042 struct sockaddr_vm local_addr; 1048 if (err)
1043 1049 goto out;
1044 vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
1045 err = __vsock_bind(sk, &local_addr);
1046 if (err != 0)
1047 goto out;
1048
1049 }
1050 1050
1051 if (!transport->dgram_allow(remote_addr->svm_cid, 1051 if (!transport->dgram_allow(remote_addr->svm_cid,
1052 remote_addr->svm_port)) { 1052 remote_addr->svm_port)) {
@@ -1163,17 +1163,9 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1163 memcpy(&vsk->remote_addr, remote_addr, 1163 memcpy(&vsk->remote_addr, remote_addr,
1164 sizeof(vsk->remote_addr)); 1164 sizeof(vsk->remote_addr));
1165 1165
1166 /* Autobind this socket to the local address if necessary. */ 1166 err = vsock_auto_bind(vsk);
1167 if (!vsock_addr_bound(&vsk->local_addr)) { 1167 if (err)
1168 struct sockaddr_vm local_addr; 1168 goto out;
1169
1170 vsock_addr_init(&local_addr, VMADDR_CID_ANY,
1171 VMADDR_PORT_ANY);
1172 err = __vsock_bind(sk, &local_addr);
1173 if (err != 0)
1174 goto out;
1175
1176 }
1177 1169
1178 sk->sk_state = SS_CONNECTING; 1170 sk->sk_state = SS_CONNECTING;
1179 1171
diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h
deleted file mode 100644
index 7d64d3609ec9..000000000000
--- a/net/vmw_vsock/af_vsock.h
+++ /dev/null
@@ -1,175 +0,0 @@
1/*
2 * VMware vSockets Driver
3 *
4 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef __AF_VSOCK_H__
17#define __AF_VSOCK_H__
18
19#include <linux/kernel.h>
20#include <linux/workqueue.h>
21#include <linux/vm_sockets.h>
22
23#include "vsock_addr.h"
24
25#define LAST_RESERVED_PORT 1023
26
27#define vsock_sk(__sk) ((struct vsock_sock *)__sk)
28#define sk_vsock(__vsk) (&(__vsk)->sk)
29
30struct vsock_sock {
31 /* sk must be the first member. */
32 struct sock sk;
33 struct sockaddr_vm local_addr;
34 struct sockaddr_vm remote_addr;
35 /* Links for the global tables of bound and connected sockets. */
36 struct list_head bound_table;
37 struct list_head connected_table;
38 /* Accessed without the socket lock held. This means it can never be
39 * modified outsided of socket create or destruct.
40 */
41 bool trusted;
42 bool cached_peer_allow_dgram; /* Dgram communication allowed to
43 * cached peer?
44 */
45 u32 cached_peer; /* Context ID of last dgram destination check. */
46 const struct cred *owner;
47 /* Rest are SOCK_STREAM only. */
48 long connect_timeout;
49 /* Listening socket that this came from. */
50 struct sock *listener;
51 /* Used for pending list and accept queue during connection handshake.
52 * The listening socket is the head for both lists. Sockets created
53 * for connection requests are placed in the pending list until they
54 * are connected, at which point they are put in the accept queue list
55 * so they can be accepted in accept(). If accept() cannot accept the
56 * connection, it is marked as rejected so the cleanup function knows
57 * to clean up the socket.
58 */
59 struct list_head pending_links;
60 struct list_head accept_queue;
61 bool rejected;
62 struct delayed_work dwork;
63 u32 peer_shutdown;
64 bool sent_request;
65 bool ignore_connecting_rst;
66
67 /* Private to transport. */
68 void *trans;
69};
70
71s64 vsock_stream_has_data(struct vsock_sock *vsk);
72s64 vsock_stream_has_space(struct vsock_sock *vsk);
73void vsock_pending_work(struct work_struct *work);
74struct sock *__vsock_create(struct net *net,
75 struct socket *sock,
76 struct sock *parent,
77 gfp_t priority, unsigned short type);
78
79/**** TRANSPORT ****/
80
81struct vsock_transport_recv_notify_data {
82 u64 data1; /* Transport-defined. */
83 u64 data2; /* Transport-defined. */
84 bool notify_on_block;
85};
86
87struct vsock_transport_send_notify_data {
88 u64 data1; /* Transport-defined. */
89 u64 data2; /* Transport-defined. */
90};
91
92struct vsock_transport {
93 /* Initialize/tear-down socket. */
94 int (*init)(struct vsock_sock *, struct vsock_sock *);
95 void (*destruct)(struct vsock_sock *);
96 void (*release)(struct vsock_sock *);
97
98 /* Connections. */
99 int (*connect)(struct vsock_sock *);
100
101 /* DGRAM. */
102 int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
103 int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk,
104 struct msghdr *msg, size_t len, int flags);
105 int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
106 struct iovec *, size_t len);
107 bool (*dgram_allow)(u32 cid, u32 port);
108
109 /* STREAM. */
110 /* TODO: stream_bind() */
111 ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *,
112 size_t len, int flags);
113 ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *,
114 size_t len);
115 s64 (*stream_has_data)(struct vsock_sock *);
116 s64 (*stream_has_space)(struct vsock_sock *);
117 u64 (*stream_rcvhiwat)(struct vsock_sock *);
118 bool (*stream_is_active)(struct vsock_sock *);
119 bool (*stream_allow)(u32 cid, u32 port);
120
121 /* Notification. */
122 int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
123 int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
124 int (*notify_recv_init)(struct vsock_sock *, size_t,
125 struct vsock_transport_recv_notify_data *);
126 int (*notify_recv_pre_block)(struct vsock_sock *, size_t,
127 struct vsock_transport_recv_notify_data *);
128 int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t,
129 struct vsock_transport_recv_notify_data *);
130 int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t,
131 ssize_t, bool, struct vsock_transport_recv_notify_data *);
132 int (*notify_send_init)(struct vsock_sock *,
133 struct vsock_transport_send_notify_data *);
134 int (*notify_send_pre_block)(struct vsock_sock *,
135 struct vsock_transport_send_notify_data *);
136 int (*notify_send_pre_enqueue)(struct vsock_sock *,
137 struct vsock_transport_send_notify_data *);
138 int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t,
139 struct vsock_transport_send_notify_data *);
140
141 /* Shutdown. */
142 int (*shutdown)(struct vsock_sock *, int);
143
144 /* Buffer sizes. */
145 void (*set_buffer_size)(struct vsock_sock *, u64);
146 void (*set_min_buffer_size)(struct vsock_sock *, u64);
147 void (*set_max_buffer_size)(struct vsock_sock *, u64);
148 u64 (*get_buffer_size)(struct vsock_sock *);
149 u64 (*get_min_buffer_size)(struct vsock_sock *);
150 u64 (*get_max_buffer_size)(struct vsock_sock *);
151
152 /* Addressing. */
153 u32 (*get_local_cid)(void);
154};
155
156/**** CORE ****/
157
158int vsock_core_init(const struct vsock_transport *t);
159void vsock_core_exit(void);
160
161/**** UTILS ****/
162
163void vsock_release_pending(struct sock *pending);
164void vsock_add_pending(struct sock *listener, struct sock *pending);
165void vsock_remove_pending(struct sock *listener, struct sock *pending);
166void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
167void vsock_insert_connected(struct vsock_sock *vsk);
168void vsock_remove_bound(struct vsock_sock *vsk);
169void vsock_remove_connected(struct vsock_sock *vsk);
170struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
171struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
172 struct sockaddr_vm *dst);
173void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
174
175#endif /* __AF_VSOCK_H__ */
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index daff75200e25..9d6986634e0b 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -34,8 +34,8 @@
34#include <linux/wait.h> 34#include <linux/wait.h>
35#include <linux/workqueue.h> 35#include <linux/workqueue.h>
36#include <net/sock.h> 36#include <net/sock.h>
37#include <net/af_vsock.h>
37 38
38#include "af_vsock.h"
39#include "vmci_transport_notify.h" 39#include "vmci_transport_notify.h"
40 40
41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); 41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
@@ -625,13 +625,14 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
625 625
626 /* Attach the packet to the socket's receive queue as an sk_buff. */ 626 /* Attach the packet to the socket's receive queue as an sk_buff. */
627 skb = alloc_skb(size, GFP_ATOMIC); 627 skb = alloc_skb(size, GFP_ATOMIC);
628 if (skb) { 628 if (!skb)
629 /* sk_receive_skb() will do a sock_put(), so hold here. */ 629 return VMCI_ERROR_NO_MEM;
630 sock_hold(sk); 630
631 skb_put(skb, size); 631 /* sk_receive_skb() will do a sock_put(), so hold here. */
632 memcpy(skb->data, dg, size); 632 sock_hold(sk);
633 sk_receive_skb(sk, skb, 0); 633 skb_put(skb, size);
634 } 634 memcpy(skb->data, dg, size);
635 sk_receive_skb(sk, skb, 0);
635 636
636 return VMCI_SUCCESS; 637 return VMCI_SUCCESS;
637} 638}
@@ -939,10 +940,9 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
939 * reset to prevent that. 940 * reset to prevent that.
940 */ 941 */
941 vmci_transport_send_reset(sk, pkt); 942 vmci_transport_send_reset(sk, pkt);
942 goto out; 943 break;
943 } 944 }
944 945
945out:
946 release_sock(sk); 946 release_sock(sk);
947 kfree(recv_pkt_info); 947 kfree(recv_pkt_info);
948 /* Release reference obtained in the stream callback when we fetched 948 /* Release reference obtained in the stream callback when we fetched
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index fd88ea8924e4..ce6c9623d5f0 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -19,8 +19,8 @@
19#include <linux/vmw_vmci_defs.h> 19#include <linux/vmw_vmci_defs.h>
20#include <linux/vmw_vmci_api.h> 20#include <linux/vmw_vmci_api.h>
21 21
22#include "vsock_addr.h" 22#include <net/vsock_addr.h>
23#include "af_vsock.h" 23#include <net/af_vsock.h>
24 24
25/* If the packet format changes in a release then this should change too. */ 25/* If the packet format changes in a release then this should change too. */
26#define VMCI_TRANSPORT_PACKET_VERSION 1 26#define VMCI_TRANSPORT_PACKET_VERSION 1
diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c
index ec2611b4ea0e..82486ee55eac 100644
--- a/net/vmw_vsock/vsock_addr.c
+++ b/net/vmw_vsock/vsock_addr.c
@@ -17,8 +17,7 @@
17#include <linux/socket.h> 17#include <linux/socket.h>
18#include <linux/stddef.h> 18#include <linux/stddef.h>
19#include <net/sock.h> 19#include <net/sock.h>
20 20#include <net/vsock_addr.h>
21#include "vsock_addr.h"
22 21
23void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port) 22void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port)
24{ 23{
diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h
deleted file mode 100644
index 9ccd5316eac0..000000000000
--- a/net/vmw_vsock/vsock_addr.h
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * VMware vSockets Driver
3 *
4 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _VSOCK_ADDR_H_
17#define _VSOCK_ADDR_H_
18
19#include <linux/vm_sockets.h>
20
21void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port);
22int vsock_addr_validate(const struct sockaddr_vm *addr);
23bool vsock_addr_bound(const struct sockaddr_vm *addr);
24void vsock_addr_unbind(struct sockaddr_vm *addr);
25bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
26 const struct sockaddr_vm *other);
27int vsock_addr_cast(const struct sockaddr *addr, size_t len,
28 struct sockaddr_vm **out_addr);
29
30#endif
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index fd556ac05fdb..50f6195c8b70 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -54,6 +54,8 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
54 control_freq = chandef->chan->center_freq; 54 control_freq = chandef->chan->center_freq;
55 55
56 switch (chandef->width) { 56 switch (chandef->width) {
57 case NL80211_CHAN_WIDTH_5:
58 case NL80211_CHAN_WIDTH_10:
57 case NL80211_CHAN_WIDTH_20: 59 case NL80211_CHAN_WIDTH_20:
58 case NL80211_CHAN_WIDTH_20_NOHT: 60 case NL80211_CHAN_WIDTH_20_NOHT:
59 if (chandef->center_freq1 != control_freq) 61 if (chandef->center_freq1 != control_freq)
@@ -152,6 +154,12 @@ static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)
152 int width; 154 int width;
153 155
154 switch (c->width) { 156 switch (c->width) {
157 case NL80211_CHAN_WIDTH_5:
158 width = 5;
159 break;
160 case NL80211_CHAN_WIDTH_10:
161 width = 10;
162 break;
155 case NL80211_CHAN_WIDTH_20: 163 case NL80211_CHAN_WIDTH_20:
156 case NL80211_CHAN_WIDTH_20_NOHT: 164 case NL80211_CHAN_WIDTH_20_NOHT:
157 width = 20; 165 width = 20;
@@ -194,6 +202,16 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
194 if (c1->width == c2->width) 202 if (c1->width == c2->width)
195 return NULL; 203 return NULL;
196 204
205 /*
206 * can't be compatible if one of them is 5 or 10 MHz,
207 * but they don't have the same width.
208 */
209 if (c1->width == NL80211_CHAN_WIDTH_5 ||
210 c1->width == NL80211_CHAN_WIDTH_10 ||
211 c2->width == NL80211_CHAN_WIDTH_5 ||
212 c2->width == NL80211_CHAN_WIDTH_10)
213 return NULL;
214
197 if (c1->width == NL80211_CHAN_WIDTH_20_NOHT || 215 if (c1->width == NL80211_CHAN_WIDTH_20_NOHT ||
198 c1->width == NL80211_CHAN_WIDTH_20) 216 c1->width == NL80211_CHAN_WIDTH_20)
199 return c2; 217 return c2;
@@ -264,11 +282,17 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy,
264 u32 bandwidth) 282 u32 bandwidth)
265{ 283{
266 struct ieee80211_channel *c; 284 struct ieee80211_channel *c;
267 u32 freq; 285 u32 freq, start_freq, end_freq;
286
287 if (bandwidth <= 20) {
288 start_freq = center_freq;
289 end_freq = center_freq;
290 } else {
291 start_freq = center_freq - bandwidth/2 + 10;
292 end_freq = center_freq + bandwidth/2 - 10;
293 }
268 294
269 for (freq = center_freq - bandwidth/2 + 10; 295 for (freq = start_freq; freq <= end_freq; freq += 20) {
270 freq <= center_freq + bandwidth/2 - 10;
271 freq += 20) {
272 c = ieee80211_get_channel(wiphy, freq); 296 c = ieee80211_get_channel(wiphy, freq);
273 if (!c) 297 if (!c)
274 return -EINVAL; 298 return -EINVAL;
@@ -310,11 +334,17 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
310 u32 prohibited_flags) 334 u32 prohibited_flags)
311{ 335{
312 struct ieee80211_channel *c; 336 struct ieee80211_channel *c;
313 u32 freq; 337 u32 freq, start_freq, end_freq;
338
339 if (bandwidth <= 20) {
340 start_freq = center_freq;
341 end_freq = center_freq;
342 } else {
343 start_freq = center_freq - bandwidth/2 + 10;
344 end_freq = center_freq + bandwidth/2 - 10;
345 }
314 346
315 for (freq = center_freq - bandwidth/2 + 10; 347 for (freq = start_freq; freq <= end_freq; freq += 20) {
316 freq <= center_freq + bandwidth/2 - 10;
317 freq += 20) {
318 c = ieee80211_get_channel(wiphy, freq); 348 c = ieee80211_get_channel(wiphy, freq);
319 if (!c) 349 if (!c)
320 return false; 350 return false;
@@ -349,6 +379,12 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
349 control_freq = chandef->chan->center_freq; 379 control_freq = chandef->chan->center_freq;
350 380
351 switch (chandef->width) { 381 switch (chandef->width) {
382 case NL80211_CHAN_WIDTH_5:
383 width = 5;
384 break;
385 case NL80211_CHAN_WIDTH_10:
386 width = 10;
387 break;
352 case NL80211_CHAN_WIDTH_20: 388 case NL80211_CHAN_WIDTH_20:
353 if (!ht_cap->ht_supported) 389 if (!ht_cap->ht_supported)
354 return false; 390 return false;
@@ -405,6 +441,11 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
405 if (width > 20) 441 if (width > 20)
406 prohibited_flags |= IEEE80211_CHAN_NO_OFDM; 442 prohibited_flags |= IEEE80211_CHAN_NO_OFDM;
407 443
444 /* 5 and 10 MHz are only defined for the OFDM PHY */
445 if (width < 20)
446 prohibited_flags |= IEEE80211_CHAN_NO_OFDM;
447
448
408 if (!cfg80211_secondary_chans_ok(wiphy, chandef->center_freq1, 449 if (!cfg80211_secondary_chans_ok(wiphy, chandef->center_freq1,
409 width, prohibited_flags)) 450 width, prohibited_flags))
410 return false; 451 return false;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 73405e00c800..67153964aad2 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -34,13 +34,12 @@
34MODULE_AUTHOR("Johannes Berg"); 34MODULE_AUTHOR("Johannes Berg");
35MODULE_LICENSE("GPL"); 35MODULE_LICENSE("GPL");
36MODULE_DESCRIPTION("wireless configuration support"); 36MODULE_DESCRIPTION("wireless configuration support");
37MODULE_ALIAS_GENL_FAMILY(NL80211_GENL_NAME);
37 38
38/* RCU-protected (and cfg80211_mutex for writers) */ 39/* RCU-protected (and RTNL for writers) */
39LIST_HEAD(cfg80211_rdev_list); 40LIST_HEAD(cfg80211_rdev_list);
40int cfg80211_rdev_list_generation; 41int cfg80211_rdev_list_generation;
41 42
42DEFINE_MUTEX(cfg80211_mutex);
43
44/* for debugfs */ 43/* for debugfs */
45static struct dentry *ieee80211_debugfs_dir; 44static struct dentry *ieee80211_debugfs_dir;
46 45
@@ -52,12 +51,11 @@ module_param(cfg80211_disable_40mhz_24ghz, bool, 0644);
52MODULE_PARM_DESC(cfg80211_disable_40mhz_24ghz, 51MODULE_PARM_DESC(cfg80211_disable_40mhz_24ghz,
53 "Disable 40MHz support in the 2.4GHz band"); 52 "Disable 40MHz support in the 2.4GHz band");
54 53
55/* requires cfg80211_mutex to be held! */
56struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) 54struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx)
57{ 55{
58 struct cfg80211_registered_device *result = NULL, *rdev; 56 struct cfg80211_registered_device *result = NULL, *rdev;
59 57
60 assert_cfg80211_lock(); 58 ASSERT_RTNL();
61 59
62 list_for_each_entry(rdev, &cfg80211_rdev_list, list) { 60 list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
63 if (rdev->wiphy_idx == wiphy_idx) { 61 if (rdev->wiphy_idx == wiphy_idx) {
@@ -76,12 +74,11 @@ int get_wiphy_idx(struct wiphy *wiphy)
76 return rdev->wiphy_idx; 74 return rdev->wiphy_idx;
77} 75}
78 76
79/* requires cfg80211_rdev_mutex to be held! */
80struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) 77struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx)
81{ 78{
82 struct cfg80211_registered_device *rdev; 79 struct cfg80211_registered_device *rdev;
83 80
84 assert_cfg80211_lock(); 81 ASSERT_RTNL();
85 82
86 rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx); 83 rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx);
87 if (!rdev) 84 if (!rdev)
@@ -89,35 +86,13 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx)
89 return &rdev->wiphy; 86 return &rdev->wiphy;
90} 87}
91 88
92struct cfg80211_registered_device *
93cfg80211_get_dev_from_ifindex(struct net *net, int ifindex)
94{
95 struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV);
96 struct net_device *dev;
97
98 mutex_lock(&cfg80211_mutex);
99 dev = dev_get_by_index(net, ifindex);
100 if (!dev)
101 goto out;
102 if (dev->ieee80211_ptr) {
103 rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
104 mutex_lock(&rdev->mtx);
105 } else
106 rdev = ERR_PTR(-ENODEV);
107 dev_put(dev);
108 out:
109 mutex_unlock(&cfg80211_mutex);
110 return rdev;
111}
112
113/* requires cfg80211_mutex to be held */
114int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, 89int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
115 char *newname) 90 char *newname)
116{ 91{
117 struct cfg80211_registered_device *rdev2; 92 struct cfg80211_registered_device *rdev2;
118 int wiphy_idx, taken = -1, result, digits; 93 int wiphy_idx, taken = -1, result, digits;
119 94
120 assert_cfg80211_lock(); 95 ASSERT_RTNL();
121 96
122 /* prohibit calling the thing phy%d when %d is not its number */ 97 /* prohibit calling the thing phy%d when %d is not its number */
123 sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken); 98 sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken);
@@ -215,8 +190,7 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
215void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, 190void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
216 struct wireless_dev *wdev) 191 struct wireless_dev *wdev)
217{ 192{
218 lockdep_assert_held(&rdev->devlist_mtx); 193 ASSERT_RTNL();
219 lockdep_assert_held(&rdev->sched_scan_mtx);
220 194
221 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) 195 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE))
222 return; 196 return;
@@ -230,18 +204,15 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
230 rdev->opencount--; 204 rdev->opencount--;
231 205
232 if (rdev->scan_req && rdev->scan_req->wdev == wdev) { 206 if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
233 bool busy = work_busy(&rdev->scan_done_wk);
234
235 /* 207 /*
236 * If the work isn't pending or running (in which case it would 208 * If the scan request wasn't notified as done, set it
237 * be waiting for the lock we hold) the driver didn't properly 209 * to aborted and leak it after a warning. The driver
238 * cancel the scan when the interface was removed. In this case 210 * should have notified us that it ended at the latest
239 * warn and leak the scan request object to not crash later. 211 * during rdev_stop_p2p_device().
240 */ 212 */
241 WARN_ON(!busy); 213 if (WARN_ON(!rdev->scan_req->notified))
242 214 rdev->scan_req->aborted = true;
243 rdev->scan_req->aborted = true; 215 ___cfg80211_scan_done(rdev, !rdev->scan_req->notified);
244 ___cfg80211_scan_done(rdev, !busy);
245 } 216 }
246} 217}
247 218
@@ -255,8 +226,6 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
255 226
256 rtnl_lock(); 227 rtnl_lock();
257 228
258 /* read-only iteration need not hold the devlist_mtx */
259
260 list_for_each_entry(wdev, &rdev->wdev_list, list) { 229 list_for_each_entry(wdev, &rdev->wdev_list, list) {
261 if (wdev->netdev) { 230 if (wdev->netdev) {
262 dev_close(wdev->netdev); 231 dev_close(wdev->netdev);
@@ -265,12 +234,7 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
265 /* otherwise, check iftype */ 234 /* otherwise, check iftype */
266 switch (wdev->iftype) { 235 switch (wdev->iftype) {
267 case NL80211_IFTYPE_P2P_DEVICE: 236 case NL80211_IFTYPE_P2P_DEVICE:
268 /* but this requires it */
269 mutex_lock(&rdev->devlist_mtx);
270 mutex_lock(&rdev->sched_scan_mtx);
271 cfg80211_stop_p2p_device(rdev, wdev); 237 cfg80211_stop_p2p_device(rdev, wdev);
272 mutex_unlock(&rdev->sched_scan_mtx);
273 mutex_unlock(&rdev->devlist_mtx);
274 break; 238 break;
275 default: 239 default:
276 break; 240 break;
@@ -298,10 +262,7 @@ static void cfg80211_event_work(struct work_struct *work)
298 event_work); 262 event_work);
299 263
300 rtnl_lock(); 264 rtnl_lock();
301 cfg80211_lock_rdev(rdev);
302
303 cfg80211_process_rdev_events(rdev); 265 cfg80211_process_rdev_events(rdev);
304 cfg80211_unlock_rdev(rdev);
305 rtnl_unlock(); 266 rtnl_unlock();
306} 267}
307 268
@@ -309,7 +270,7 @@ static void cfg80211_event_work(struct work_struct *work)
309 270
310struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) 271struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
311{ 272{
312 static int wiphy_counter; 273 static atomic_t wiphy_counter = ATOMIC_INIT(0);
313 274
314 struct cfg80211_registered_device *rdev; 275 struct cfg80211_registered_device *rdev;
315 int alloc_size; 276 int alloc_size;
@@ -331,26 +292,21 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
331 292
332 rdev->ops = ops; 293 rdev->ops = ops;
333 294
334 mutex_lock(&cfg80211_mutex); 295 rdev->wiphy_idx = atomic_inc_return(&wiphy_counter);
335
336 rdev->wiphy_idx = wiphy_counter++;
337 296
338 if (unlikely(rdev->wiphy_idx < 0)) { 297 if (unlikely(rdev->wiphy_idx < 0)) {
339 wiphy_counter--;
340 mutex_unlock(&cfg80211_mutex);
341 /* ugh, wrapped! */ 298 /* ugh, wrapped! */
299 atomic_dec(&wiphy_counter);
342 kfree(rdev); 300 kfree(rdev);
343 return NULL; 301 return NULL;
344 } 302 }
345 303
346 mutex_unlock(&cfg80211_mutex); 304 /* atomic_inc_return makes it start at 1, make it start at 0 */
305 rdev->wiphy_idx--;
347 306
348 /* give it a proper name */ 307 /* give it a proper name */
349 dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); 308 dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
350 309
351 mutex_init(&rdev->mtx);
352 mutex_init(&rdev->devlist_mtx);
353 mutex_init(&rdev->sched_scan_mtx);
354 INIT_LIST_HEAD(&rdev->wdev_list); 310 INIT_LIST_HEAD(&rdev->wdev_list);
355 INIT_LIST_HEAD(&rdev->beacon_registrations); 311 INIT_LIST_HEAD(&rdev->beacon_registrations);
356 spin_lock_init(&rdev->beacon_registrations_lock); 312 spin_lock_init(&rdev->beacon_registrations_lock);
@@ -496,11 +452,24 @@ int wiphy_register(struct wiphy *wiphy)
496 u16 ifmodes = wiphy->interface_modes; 452 u16 ifmodes = wiphy->interface_modes;
497 453
498#ifdef CONFIG_PM 454#ifdef CONFIG_PM
499 if (WARN_ON((wiphy->wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && 455 if (WARN_ON(wiphy->wowlan &&
500 !(wiphy->wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY))) 456 (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
457 !(wiphy->wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY)))
458 return -EINVAL;
459 if (WARN_ON(wiphy->wowlan &&
460 !wiphy->wowlan->flags && !wiphy->wowlan->n_patterns &&
461 !wiphy->wowlan->tcp))
501 return -EINVAL; 462 return -EINVAL;
502#endif 463#endif
503 464
465 if (WARN_ON(wiphy->coalesce &&
466 (!wiphy->coalesce->n_rules ||
467 !wiphy->coalesce->n_patterns) &&
468 (!wiphy->coalesce->pattern_min_len ||
469 wiphy->coalesce->pattern_min_len >
470 wiphy->coalesce->pattern_max_len)))
471 return -EINVAL;
472
504 if (WARN_ON(wiphy->ap_sme_capa && 473 if (WARN_ON(wiphy->ap_sme_capa &&
505 !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME))) 474 !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME)))
506 return -EINVAL; 475 return -EINVAL;
@@ -587,25 +556,28 @@ int wiphy_register(struct wiphy *wiphy)
587 } 556 }
588 557
589#ifdef CONFIG_PM 558#ifdef CONFIG_PM
590 if (rdev->wiphy.wowlan.n_patterns) { 559 if (WARN_ON(rdev->wiphy.wowlan && rdev->wiphy.wowlan->n_patterns &&
591 if (WARN_ON(!rdev->wiphy.wowlan.pattern_min_len || 560 (!rdev->wiphy.wowlan->pattern_min_len ||
592 rdev->wiphy.wowlan.pattern_min_len > 561 rdev->wiphy.wowlan->pattern_min_len >
593 rdev->wiphy.wowlan.pattern_max_len)) 562 rdev->wiphy.wowlan->pattern_max_len)))
594 return -EINVAL; 563 return -EINVAL;
595 }
596#endif 564#endif
597 565
598 /* check and set up bitrates */ 566 /* check and set up bitrates */
599 ieee80211_set_bitrate_flags(wiphy); 567 ieee80211_set_bitrate_flags(wiphy);
600 568
601 mutex_lock(&cfg80211_mutex);
602 569
603 res = device_add(&rdev->wiphy.dev); 570 res = device_add(&rdev->wiphy.dev);
571 if (res)
572 return res;
573
574 res = rfkill_register(rdev->rfkill);
604 if (res) { 575 if (res) {
605 mutex_unlock(&cfg80211_mutex); 576 device_del(&rdev->wiphy.dev);
606 return res; 577 return res;
607 } 578 }
608 579
580 rtnl_lock();
609 /* set up regulatory info */ 581 /* set up regulatory info */
610 wiphy_regulatory_register(wiphy); 582 wiphy_regulatory_register(wiphy);
611 583
@@ -631,25 +603,7 @@ int wiphy_register(struct wiphy *wiphy)
631 } 603 }
632 604
633 cfg80211_debugfs_rdev_add(rdev); 605 cfg80211_debugfs_rdev_add(rdev);
634 mutex_unlock(&cfg80211_mutex);
635 606
636 /*
637 * due to a locking dependency this has to be outside of the
638 * cfg80211_mutex lock
639 */
640 res = rfkill_register(rdev->rfkill);
641 if (res) {
642 device_del(&rdev->wiphy.dev);
643
644 mutex_lock(&cfg80211_mutex);
645 debugfs_remove_recursive(rdev->wiphy.debugfsdir);
646 list_del_rcu(&rdev->list);
647 wiphy_regulatory_deregister(wiphy);
648 mutex_unlock(&cfg80211_mutex);
649 return res;
650 }
651
652 rtnl_lock();
653 rdev->wiphy.registered = true; 607 rdev->wiphy.registered = true;
654 rtnl_unlock(); 608 rtnl_unlock();
655 return 0; 609 return 0;
@@ -679,25 +633,19 @@ void wiphy_unregister(struct wiphy *wiphy)
679{ 633{
680 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 634 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
681 635
682 rtnl_lock();
683 rdev->wiphy.registered = false;
684 rtnl_unlock();
685
686 rfkill_unregister(rdev->rfkill);
687
688 /* protect the device list */
689 mutex_lock(&cfg80211_mutex);
690
691 wait_event(rdev->dev_wait, ({ 636 wait_event(rdev->dev_wait, ({
692 int __count; 637 int __count;
693 mutex_lock(&rdev->devlist_mtx); 638 rtnl_lock();
694 __count = rdev->opencount; 639 __count = rdev->opencount;
695 mutex_unlock(&rdev->devlist_mtx); 640 rtnl_unlock();
696 __count == 0; })); 641 __count == 0; }));
697 642
698 mutex_lock(&rdev->devlist_mtx); 643 rfkill_unregister(rdev->rfkill);
644
645 rtnl_lock();
646 rdev->wiphy.registered = false;
647
699 BUG_ON(!list_empty(&rdev->wdev_list)); 648 BUG_ON(!list_empty(&rdev->wdev_list));
700 mutex_unlock(&rdev->devlist_mtx);
701 649
702 /* 650 /*
703 * First remove the hardware from everywhere, this makes 651 * First remove the hardware from everywhere, this makes
@@ -708,20 +656,6 @@ void wiphy_unregister(struct wiphy *wiphy)
708 synchronize_rcu(); 656 synchronize_rcu();
709 657
710 /* 658 /*
711 * Try to grab rdev->mtx. If a command is still in progress,
712 * hopefully the driver will refuse it since it's tearing
713 * down the device already. We wait for this command to complete
714 * before unlinking the item from the list.
715 * Note: as codified by the BUG_ON above we cannot get here if
716 * a virtual interface is still present. Hence, we can only get
717 * to lock contention here if userspace issues a command that
718 * identified the hardware by wiphy index.
719 */
720 cfg80211_lock_rdev(rdev);
721 /* nothing */
722 cfg80211_unlock_rdev(rdev);
723
724 /*
725 * If this device got a regulatory hint tell core its 659 * If this device got a regulatory hint tell core its
726 * free to listen now to a new shiny device regulatory hint 660 * free to listen now to a new shiny device regulatory hint
727 */ 661 */
@@ -730,16 +664,19 @@ void wiphy_unregister(struct wiphy *wiphy)
730 cfg80211_rdev_list_generation++; 664 cfg80211_rdev_list_generation++;
731 device_del(&rdev->wiphy.dev); 665 device_del(&rdev->wiphy.dev);
732 666
733 mutex_unlock(&cfg80211_mutex); 667 rtnl_unlock();
734 668
735 flush_work(&rdev->scan_done_wk); 669 flush_work(&rdev->scan_done_wk);
736 cancel_work_sync(&rdev->conn_work); 670 cancel_work_sync(&rdev->conn_work);
737 flush_work(&rdev->event_work); 671 flush_work(&rdev->event_work);
738 cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); 672 cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
739 673
740 if (rdev->wowlan && rdev->ops->set_wakeup) 674#ifdef CONFIG_PM
675 if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
741 rdev_set_wakeup(rdev, false); 676 rdev_set_wakeup(rdev, false);
677#endif
742 cfg80211_rdev_free_wowlan(rdev); 678 cfg80211_rdev_free_wowlan(rdev);
679 cfg80211_rdev_free_coalesce(rdev);
743} 680}
744EXPORT_SYMBOL(wiphy_unregister); 681EXPORT_SYMBOL(wiphy_unregister);
745 682
@@ -748,9 +685,6 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
748 struct cfg80211_internal_bss *scan, *tmp; 685 struct cfg80211_internal_bss *scan, *tmp;
749 struct cfg80211_beacon_registration *reg, *treg; 686 struct cfg80211_beacon_registration *reg, *treg;
750 rfkill_destroy(rdev->rfkill); 687 rfkill_destroy(rdev->rfkill);
751 mutex_destroy(&rdev->mtx);
752 mutex_destroy(&rdev->devlist_mtx);
753 mutex_destroy(&rdev->sched_scan_mtx);
754 list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { 688 list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) {
755 list_del(&reg->list); 689 list_del(&reg->list);
756 kfree(reg); 690 kfree(reg);
@@ -775,36 +709,6 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
775} 709}
776EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); 710EXPORT_SYMBOL(wiphy_rfkill_set_hw_state);
777 711
778static void wdev_cleanup_work(struct work_struct *work)
779{
780 struct wireless_dev *wdev;
781 struct cfg80211_registered_device *rdev;
782
783 wdev = container_of(work, struct wireless_dev, cleanup_work);
784 rdev = wiphy_to_dev(wdev->wiphy);
785
786 mutex_lock(&rdev->sched_scan_mtx);
787
788 if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) {
789 rdev->scan_req->aborted = true;
790 ___cfg80211_scan_done(rdev, true);
791 }
792
793 if (WARN_ON(rdev->sched_scan_req &&
794 rdev->sched_scan_req->dev == wdev->netdev)) {
795 __cfg80211_stop_sched_scan(rdev, false);
796 }
797
798 mutex_unlock(&rdev->sched_scan_mtx);
799
800 mutex_lock(&rdev->devlist_mtx);
801 rdev->opencount--;
802 mutex_unlock(&rdev->devlist_mtx);
803 wake_up(&rdev->dev_wait);
804
805 dev_put(wdev->netdev);
806}
807
808void cfg80211_unregister_wdev(struct wireless_dev *wdev) 712void cfg80211_unregister_wdev(struct wireless_dev *wdev)
809{ 713{
810 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); 714 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
@@ -814,8 +718,6 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
814 if (WARN_ON(wdev->netdev)) 718 if (WARN_ON(wdev->netdev))
815 return; 719 return;
816 720
817 mutex_lock(&rdev->devlist_mtx);
818 mutex_lock(&rdev->sched_scan_mtx);
819 list_del_rcu(&wdev->list); 721 list_del_rcu(&wdev->list);
820 rdev->devlist_generation++; 722 rdev->devlist_generation++;
821 723
@@ -827,8 +729,6 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
827 WARN_ON_ONCE(1); 729 WARN_ON_ONCE(1);
828 break; 730 break;
829 } 731 }
830 mutex_unlock(&rdev->sched_scan_mtx);
831 mutex_unlock(&rdev->devlist_mtx);
832} 732}
833EXPORT_SYMBOL(cfg80211_unregister_wdev); 733EXPORT_SYMBOL(cfg80211_unregister_wdev);
834 734
@@ -847,7 +747,7 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
847} 747}
848 748
849void cfg80211_leave(struct cfg80211_registered_device *rdev, 749void cfg80211_leave(struct cfg80211_registered_device *rdev,
850 struct wireless_dev *wdev) 750 struct wireless_dev *wdev)
851{ 751{
852 struct net_device *dev = wdev->netdev; 752 struct net_device *dev = wdev->netdev;
853 753
@@ -857,9 +757,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
857 break; 757 break;
858 case NL80211_IFTYPE_P2P_CLIENT: 758 case NL80211_IFTYPE_P2P_CLIENT:
859 case NL80211_IFTYPE_STATION: 759 case NL80211_IFTYPE_STATION:
860 mutex_lock(&rdev->sched_scan_mtx);
861 __cfg80211_stop_sched_scan(rdev, false); 760 __cfg80211_stop_sched_scan(rdev, false);
862 mutex_unlock(&rdev->sched_scan_mtx);
863 761
864 wdev_lock(wdev); 762 wdev_lock(wdev);
865#ifdef CONFIG_CFG80211_WEXT 763#ifdef CONFIG_CFG80211_WEXT
@@ -868,14 +766,15 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
868 wdev->wext.ie_len = 0; 766 wdev->wext.ie_len = 0;
869 wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; 767 wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
870#endif 768#endif
871 __cfg80211_disconnect(rdev, dev, 769 cfg80211_disconnect(rdev, dev,
872 WLAN_REASON_DEAUTH_LEAVING, true); 770 WLAN_REASON_DEAUTH_LEAVING, true);
873 wdev_unlock(wdev); 771 wdev_unlock(wdev);
874 break; 772 break;
875 case NL80211_IFTYPE_MESH_POINT: 773 case NL80211_IFTYPE_MESH_POINT:
876 cfg80211_leave_mesh(rdev, dev); 774 cfg80211_leave_mesh(rdev, dev);
877 break; 775 break;
878 case NL80211_IFTYPE_AP: 776 case NL80211_IFTYPE_AP:
777 case NL80211_IFTYPE_P2P_GO:
879 cfg80211_stop_ap(rdev, dev); 778 cfg80211_stop_ap(rdev, dev);
880 break; 779 break;
881 default: 780 default:
@@ -886,10 +785,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
886} 785}
887 786
888static int cfg80211_netdev_notifier_call(struct notifier_block *nb, 787static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
889 unsigned long state, 788 unsigned long state, void *ptr)
890 void *ndev)
891{ 789{
892 struct net_device *dev = ndev; 790 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
893 struct wireless_dev *wdev = dev->ieee80211_ptr; 791 struct wireless_dev *wdev = dev->ieee80211_ptr;
894 struct cfg80211_registered_device *rdev; 792 struct cfg80211_registered_device *rdev;
895 int ret; 793 int ret;
@@ -912,13 +810,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
912 * are added with nl80211. 810 * are added with nl80211.
913 */ 811 */
914 mutex_init(&wdev->mtx); 812 mutex_init(&wdev->mtx);
915 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work);
916 INIT_LIST_HEAD(&wdev->event_list); 813 INIT_LIST_HEAD(&wdev->event_list);
917 spin_lock_init(&wdev->event_lock); 814 spin_lock_init(&wdev->event_lock);
918 INIT_LIST_HEAD(&wdev->mgmt_registrations); 815 INIT_LIST_HEAD(&wdev->mgmt_registrations);
919 spin_lock_init(&wdev->mgmt_registrations_lock); 816 spin_lock_init(&wdev->mgmt_registrations_lock);
920 817
921 mutex_lock(&rdev->devlist_mtx);
922 wdev->identifier = ++rdev->wdev_id; 818 wdev->identifier = ++rdev->wdev_id;
923 list_add_rcu(&wdev->list, &rdev->wdev_list); 819 list_add_rcu(&wdev->list, &rdev->wdev_list);
924 rdev->devlist_generation++; 820 rdev->devlist_generation++;
@@ -930,8 +826,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
930 pr_err("failed to add phy80211 symlink to netdev!\n"); 826 pr_err("failed to add phy80211 symlink to netdev!\n");
931 } 827 }
932 wdev->netdev = dev; 828 wdev->netdev = dev;
933 wdev->sme_state = CFG80211_SME_IDLE;
934 mutex_unlock(&rdev->devlist_mtx);
935#ifdef CONFIG_CFG80211_WEXT 829#ifdef CONFIG_CFG80211_WEXT
936 wdev->wext.default_key = -1; 830 wdev->wext.default_key = -1;
937 wdev->wext.default_mgmt_key = -1; 831 wdev->wext.default_mgmt_key = -1;
@@ -957,26 +851,22 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
957 break; 851 break;
958 case NETDEV_DOWN: 852 case NETDEV_DOWN:
959 cfg80211_update_iface_num(rdev, wdev->iftype, -1); 853 cfg80211_update_iface_num(rdev, wdev->iftype, -1);
960 dev_hold(dev); 854 if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
961 queue_work(cfg80211_wq, &wdev->cleanup_work); 855 if (WARN_ON(!rdev->scan_req->notified))
856 rdev->scan_req->aborted = true;
857 ___cfg80211_scan_done(rdev, true);
858 }
859
860 if (WARN_ON(rdev->sched_scan_req &&
861 rdev->sched_scan_req->dev == wdev->netdev)) {
862 __cfg80211_stop_sched_scan(rdev, false);
863 }
864
865 rdev->opencount--;
866 wake_up(&rdev->dev_wait);
962 break; 867 break;
963 case NETDEV_UP: 868 case NETDEV_UP:
964 /*
965 * If we have a really quick DOWN/UP succession we may
966 * have this work still pending ... cancel it and see
967 * if it was pending, in which case we need to account
968 * for some of the work it would have done.
969 */
970 if (cancel_work_sync(&wdev->cleanup_work)) {
971 mutex_lock(&rdev->devlist_mtx);
972 rdev->opencount--;
973 mutex_unlock(&rdev->devlist_mtx);
974 dev_put(dev);
975 }
976 cfg80211_update_iface_num(rdev, wdev->iftype, 1); 869 cfg80211_update_iface_num(rdev, wdev->iftype, 1);
977 cfg80211_lock_rdev(rdev);
978 mutex_lock(&rdev->devlist_mtx);
979 mutex_lock(&rdev->sched_scan_mtx);
980 wdev_lock(wdev); 870 wdev_lock(wdev);
981 switch (wdev->iftype) { 871 switch (wdev->iftype) {
982#ifdef CONFIG_CFG80211_WEXT 872#ifdef CONFIG_CFG80211_WEXT
@@ -1008,10 +898,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
1008 break; 898 break;
1009 } 899 }
1010 wdev_unlock(wdev); 900 wdev_unlock(wdev);
1011 mutex_unlock(&rdev->sched_scan_mtx);
1012 rdev->opencount++; 901 rdev->opencount++;
1013 mutex_unlock(&rdev->devlist_mtx);
1014 cfg80211_unlock_rdev(rdev);
1015 902
1016 /* 903 /*
1017 * Configure power management to the driver here so that its 904 * Configure power management to the driver here so that its
@@ -1028,12 +915,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
1028 break; 915 break;
1029 case NETDEV_UNREGISTER: 916 case NETDEV_UNREGISTER:
1030 /* 917 /*
1031 * NB: cannot take rdev->mtx here because this may be
1032 * called within code protected by it when interfaces
1033 * are removed with nl80211.
1034 */
1035 mutex_lock(&rdev->devlist_mtx);
1036 /*
1037 * It is possible to get NETDEV_UNREGISTER 918 * It is possible to get NETDEV_UNREGISTER
1038 * multiple times. To detect that, check 919 * multiple times. To detect that, check
1039 * that the interface is still on the list 920 * that the interface is still on the list
@@ -1049,7 +930,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
1049 kfree(wdev->wext.keys); 930 kfree(wdev->wext.keys);
1050#endif 931#endif
1051 } 932 }
1052 mutex_unlock(&rdev->devlist_mtx);
1053 /* 933 /*
1054 * synchronise (so that we won't find this netdev 934 * synchronise (so that we won't find this netdev
1055 * from other code any more) and then clear the list 935 * from other code any more) and then clear the list
@@ -1063,15 +943,19 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
1063 * freed. 943 * freed.
1064 */ 944 */
1065 cfg80211_process_wdev_events(wdev); 945 cfg80211_process_wdev_events(wdev);
946
947 if (WARN_ON(wdev->current_bss)) {
948 cfg80211_unhold_bss(wdev->current_bss);
949 cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
950 wdev->current_bss = NULL;
951 }
1066 break; 952 break;
1067 case NETDEV_PRE_UP: 953 case NETDEV_PRE_UP:
1068 if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) 954 if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
1069 return notifier_from_errno(-EOPNOTSUPP); 955 return notifier_from_errno(-EOPNOTSUPP);
1070 if (rfkill_blocked(rdev->rfkill)) 956 if (rfkill_blocked(rdev->rfkill))
1071 return notifier_from_errno(-ERFKILL); 957 return notifier_from_errno(-ERFKILL);
1072 mutex_lock(&rdev->devlist_mtx);
1073 ret = cfg80211_can_add_interface(rdev, wdev->iftype); 958 ret = cfg80211_can_add_interface(rdev, wdev->iftype);
1074 mutex_unlock(&rdev->devlist_mtx);
1075 if (ret) 959 if (ret)
1076 return notifier_from_errno(ret); 960 return notifier_from_errno(ret);
1077 break; 961 break;
@@ -1089,12 +973,10 @@ static void __net_exit cfg80211_pernet_exit(struct net *net)
1089 struct cfg80211_registered_device *rdev; 973 struct cfg80211_registered_device *rdev;
1090 974
1091 rtnl_lock(); 975 rtnl_lock();
1092 mutex_lock(&cfg80211_mutex);
1093 list_for_each_entry(rdev, &cfg80211_rdev_list, list) { 976 list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
1094 if (net_eq(wiphy_net(&rdev->wiphy), net)) 977 if (net_eq(wiphy_net(&rdev->wiphy), net))
1095 WARN_ON(cfg80211_switch_netns(rdev, &init_net)); 978 WARN_ON(cfg80211_switch_netns(rdev, &init_net));
1096 } 979 }
1097 mutex_unlock(&cfg80211_mutex);
1098 rtnl_unlock(); 980 rtnl_unlock();
1099} 981}
1100 982
diff --git a/net/wireless/core.h b/net/wireless/core.h
index fd35dae547c4..9ad43c619c54 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -5,7 +5,6 @@
5 */ 5 */
6#ifndef __NET_WIRELESS_CORE_H 6#ifndef __NET_WIRELESS_CORE_H
7#define __NET_WIRELESS_CORE_H 7#define __NET_WIRELESS_CORE_H
8#include <linux/mutex.h>
9#include <linux/list.h> 8#include <linux/list.h>
10#include <linux/netdevice.h> 9#include <linux/netdevice.h>
11#include <linux/rbtree.h> 10#include <linux/rbtree.h>
@@ -23,11 +22,6 @@
23struct cfg80211_registered_device { 22struct cfg80211_registered_device {
24 const struct cfg80211_ops *ops; 23 const struct cfg80211_ops *ops;
25 struct list_head list; 24 struct list_head list;
26 /* we hold this mutex during any call so that
27 * we cannot do multiple calls at once, and also
28 * to avoid the deregister call to proceed while
29 * any call is in progress */
30 struct mutex mtx;
31 25
32 /* rfkill support */ 26 /* rfkill support */
33 struct rfkill_ops rfkill_ops; 27 struct rfkill_ops rfkill_ops;
@@ -49,9 +43,7 @@ struct cfg80211_registered_device {
49 /* wiphy index, internal only */ 43 /* wiphy index, internal only */
50 int wiphy_idx; 44 int wiphy_idx;
51 45
52 /* associated wireless interfaces */ 46 /* associated wireless interfaces, protected by rtnl or RCU */
53 struct mutex devlist_mtx;
54 /* protected by devlist_mtx or RCU */
55 struct list_head wdev_list; 47 struct list_head wdev_list;
56 int devlist_generation, wdev_id; 48 int devlist_generation, wdev_id;
57 int opencount; /* also protected by devlist_mtx */ 49 int opencount; /* also protected by devlist_mtx */
@@ -75,8 +67,6 @@ struct cfg80211_registered_device {
75 struct work_struct scan_done_wk; 67 struct work_struct scan_done_wk;
76 struct work_struct sched_scan_results_wk; 68 struct work_struct sched_scan_results_wk;
77 69
78 struct mutex sched_scan_mtx;
79
80#ifdef CONFIG_NL80211_TESTMODE 70#ifdef CONFIG_NL80211_TESTMODE
81 struct genl_info *testmode_info; 71 struct genl_info *testmode_info;
82#endif 72#endif
@@ -84,13 +74,13 @@ struct cfg80211_registered_device {
84 struct work_struct conn_work; 74 struct work_struct conn_work;
85 struct work_struct event_work; 75 struct work_struct event_work;
86 76
87 struct cfg80211_wowlan *wowlan;
88
89 struct delayed_work dfs_update_channels_wk; 77 struct delayed_work dfs_update_channels_wk;
90 78
91 /* netlink port which started critical protocol (0 means not started) */ 79 /* netlink port which started critical protocol (0 means not started) */
92 u32 crit_proto_nlportid; 80 u32 crit_proto_nlportid;
93 81
82 struct cfg80211_coalesce *coalesce;
83
94 /* must be last because of the way we do wiphy_priv(), 84 /* must be last because of the way we do wiphy_priv(),
95 * and it should at least be aligned to NETDEV_ALIGN */ 85 * and it should at least be aligned to NETDEV_ALIGN */
96 struct wiphy wiphy __aligned(NETDEV_ALIGN); 86 struct wiphy wiphy __aligned(NETDEV_ALIGN);
@@ -106,29 +96,26 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
106static inline void 96static inline void
107cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) 97cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev)
108{ 98{
99#ifdef CONFIG_PM
109 int i; 100 int i;
110 101
111 if (!rdev->wowlan) 102 if (!rdev->wiphy.wowlan_config)
112 return; 103 return;
113 for (i = 0; i < rdev->wowlan->n_patterns; i++) 104 for (i = 0; i < rdev->wiphy.wowlan_config->n_patterns; i++)
114 kfree(rdev->wowlan->patterns[i].mask); 105 kfree(rdev->wiphy.wowlan_config->patterns[i].mask);
115 kfree(rdev->wowlan->patterns); 106 kfree(rdev->wiphy.wowlan_config->patterns);
116 if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock) 107 if (rdev->wiphy.wowlan_config->tcp &&
117 sock_release(rdev->wowlan->tcp->sock); 108 rdev->wiphy.wowlan_config->tcp->sock)
118 kfree(rdev->wowlan->tcp); 109 sock_release(rdev->wiphy.wowlan_config->tcp->sock);
119 kfree(rdev->wowlan); 110 kfree(rdev->wiphy.wowlan_config->tcp);
111 kfree(rdev->wiphy.wowlan_config);
112#endif
120} 113}
121 114
122extern struct workqueue_struct *cfg80211_wq; 115extern struct workqueue_struct *cfg80211_wq;
123extern struct mutex cfg80211_mutex;
124extern struct list_head cfg80211_rdev_list; 116extern struct list_head cfg80211_rdev_list;
125extern int cfg80211_rdev_list_generation; 117extern int cfg80211_rdev_list_generation;
126 118
127static inline void assert_cfg80211_lock(void)
128{
129 lockdep_assert_held(&cfg80211_mutex);
130}
131
132struct cfg80211_internal_bss { 119struct cfg80211_internal_bss {
133 struct list_head list; 120 struct list_head list;
134 struct list_head hidden_list; 121 struct list_head hidden_list;
@@ -161,27 +148,11 @@ static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss)
161struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx); 148struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx);
162int get_wiphy_idx(struct wiphy *wiphy); 149int get_wiphy_idx(struct wiphy *wiphy);
163 150
164/* requires cfg80211_rdev_mutex to be held! */
165struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); 151struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx);
166 152
167/* identical to cfg80211_get_dev_from_info but only operate on ifindex */
168extern struct cfg80211_registered_device *
169cfg80211_get_dev_from_ifindex(struct net *net, int ifindex);
170
171int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, 153int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
172 struct net *net); 154 struct net *net);
173 155
174static inline void cfg80211_lock_rdev(struct cfg80211_registered_device *rdev)
175{
176 mutex_lock(&rdev->mtx);
177}
178
179static inline void cfg80211_unlock_rdev(struct cfg80211_registered_device *rdev)
180{
181 BUG_ON(IS_ERR(rdev) || !rdev);
182 mutex_unlock(&rdev->mtx);
183}
184
185static inline void wdev_lock(struct wireless_dev *wdev) 156static inline void wdev_lock(struct wireless_dev *wdev)
186 __acquires(wdev) 157 __acquires(wdev)
187{ 158{
@@ -196,7 +167,7 @@ static inline void wdev_unlock(struct wireless_dev *wdev)
196 mutex_unlock(&wdev->mtx); 167 mutex_unlock(&wdev->mtx);
197} 168}
198 169
199#define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx) 170#define ASSERT_RDEV_LOCK(rdev) ASSERT_RTNL()
200#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) 171#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx)
201 172
202static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) 173static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev)
@@ -314,38 +285,21 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
314 struct net_device *dev); 285 struct net_device *dev);
315 286
316/* MLME */ 287/* MLME */
317int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
318 struct net_device *dev,
319 struct ieee80211_channel *chan,
320 enum nl80211_auth_type auth_type,
321 const u8 *bssid,
322 const u8 *ssid, int ssid_len,
323 const u8 *ie, int ie_len,
324 const u8 *key, int key_len, int key_idx,
325 const u8 *sae_data, int sae_data_len);
326int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, 288int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
327 struct net_device *dev, struct ieee80211_channel *chan, 289 struct net_device *dev,
328 enum nl80211_auth_type auth_type, const u8 *bssid, 290 struct ieee80211_channel *chan,
291 enum nl80211_auth_type auth_type,
292 const u8 *bssid,
329 const u8 *ssid, int ssid_len, 293 const u8 *ssid, int ssid_len,
330 const u8 *ie, int ie_len, 294 const u8 *ie, int ie_len,
331 const u8 *key, int key_len, int key_idx, 295 const u8 *key, int key_len, int key_idx,
332 const u8 *sae_data, int sae_data_len); 296 const u8 *sae_data, int sae_data_len);
333int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
334 struct net_device *dev,
335 struct ieee80211_channel *chan,
336 const u8 *bssid,
337 const u8 *ssid, int ssid_len,
338 struct cfg80211_assoc_request *req);
339int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 297int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
340 struct net_device *dev, 298 struct net_device *dev,
341 struct ieee80211_channel *chan, 299 struct ieee80211_channel *chan,
342 const u8 *bssid, 300 const u8 *bssid,
343 const u8 *ssid, int ssid_len, 301 const u8 *ssid, int ssid_len,
344 struct cfg80211_assoc_request *req); 302 struct cfg80211_assoc_request *req);
345int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
346 struct net_device *dev, const u8 *bssid,
347 const u8 *ie, int ie_len, u16 reason,
348 bool local_state_change);
349int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, 303int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
350 struct net_device *dev, const u8 *bssid, 304 struct net_device *dev, const u8 *bssid,
351 const u8 *ie, int ie_len, u16 reason, 305 const u8 *ie, int ie_len, u16 reason,
@@ -356,11 +310,6 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
356 bool local_state_change); 310 bool local_state_change);
357void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, 311void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
358 struct net_device *dev); 312 struct net_device *dev);
359void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
360 const u8 *req_ie, size_t req_ie_len,
361 const u8 *resp_ie, size_t resp_ie_len,
362 u16 status, bool wextev,
363 struct cfg80211_bss *bss);
364int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, 313int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
365 u16 frame_type, const u8 *match_data, 314 u16 frame_type, const u8 *match_data,
366 int match_len); 315 int match_len);
@@ -376,19 +325,19 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
376void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, 325void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
377 const struct ieee80211_vht_cap *vht_capa_mask); 326 const struct ieee80211_vht_cap *vht_capa_mask);
378 327
379/* SME */ 328/* SME events */
380int __cfg80211_connect(struct cfg80211_registered_device *rdev,
381 struct net_device *dev,
382 struct cfg80211_connect_params *connect,
383 struct cfg80211_cached_keys *connkeys,
384 const u8 *prev_bssid);
385int cfg80211_connect(struct cfg80211_registered_device *rdev, 329int cfg80211_connect(struct cfg80211_registered_device *rdev,
386 struct net_device *dev, 330 struct net_device *dev,
387 struct cfg80211_connect_params *connect, 331 struct cfg80211_connect_params *connect,
388 struct cfg80211_cached_keys *connkeys); 332 struct cfg80211_cached_keys *connkeys,
389int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, 333 const u8 *prev_bssid);
390 struct net_device *dev, u16 reason, 334void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
391 bool wextev); 335 const u8 *req_ie, size_t req_ie_len,
336 const u8 *resp_ie, size_t resp_ie_len,
337 u16 status, bool wextev,
338 struct cfg80211_bss *bss);
339void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
340 size_t ie_len, u16 reason, bool from_ap);
392int cfg80211_disconnect(struct cfg80211_registered_device *rdev, 341int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
393 struct net_device *dev, u16 reason, 342 struct net_device *dev, u16 reason,
394 bool wextev); 343 bool wextev);
@@ -399,21 +348,21 @@ void __cfg80211_roamed(struct wireless_dev *wdev,
399int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, 348int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
400 struct wireless_dev *wdev); 349 struct wireless_dev *wdev);
401 350
351/* SME implementation */
402void cfg80211_conn_work(struct work_struct *work); 352void cfg80211_conn_work(struct work_struct *work);
403void cfg80211_sme_failed_assoc(struct wireless_dev *wdev); 353void cfg80211_sme_scan_done(struct net_device *dev);
404bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev); 354bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status);
355void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len);
356void cfg80211_sme_disassoc(struct wireless_dev *wdev);
357void cfg80211_sme_deauth(struct wireless_dev *wdev);
358void cfg80211_sme_auth_timeout(struct wireless_dev *wdev);
359void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev);
405 360
406/* internal helpers */ 361/* internal helpers */
407bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); 362bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher);
408int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, 363int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
409 struct key_params *params, int key_idx, 364 struct key_params *params, int key_idx,
410 bool pairwise, const u8 *mac_addr); 365 bool pairwise, const u8 *mac_addr);
411void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
412 size_t ie_len, u16 reason, bool from_ap);
413void cfg80211_sme_scan_done(struct net_device *dev);
414void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
415void cfg80211_sme_disassoc(struct net_device *dev,
416 struct cfg80211_internal_bss *bss);
417void __cfg80211_scan_done(struct work_struct *wk); 366void __cfg80211_scan_done(struct work_struct *wk);
418void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak); 367void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak);
419void __cfg80211_sched_scan_results(struct work_struct *wk); 368void __cfg80211_sched_scan_results(struct work_struct *wk);
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 920cabe0461b..90d050036624 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -74,7 +74,7 @@ static ssize_t ht40allow_map_read(struct file *file,
74 if (!buf) 74 if (!buf)
75 return -ENOMEM; 75 return -ENOMEM;
76 76
77 mutex_lock(&cfg80211_mutex); 77 rtnl_lock();
78 78
79 for (band = 0; band < IEEE80211_NUM_BANDS; band++) { 79 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
80 sband = wiphy->bands[band]; 80 sband = wiphy->bands[band];
@@ -85,7 +85,7 @@ static ssize_t ht40allow_map_read(struct file *file,
85 buf, buf_size, offset); 85 buf, buf_size, offset);
86 } 86 }
87 87
88 mutex_unlock(&cfg80211_mutex); 88 rtnl_unlock();
89 89
90 r = simple_read_from_buffer(user_buf, count, ppos, buf, offset); 90 r = simple_read_from_buffer(user_buf, count, ppos, buf, offset);
91 91
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index d80e47194d49..39bff7d36768 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -43,7 +43,6 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid)
43 cfg80211_hold_bss(bss_from_pub(bss)); 43 cfg80211_hold_bss(bss_from_pub(bss));
44 wdev->current_bss = bss_from_pub(bss); 44 wdev->current_bss = bss_from_pub(bss);
45 45
46 wdev->sme_state = CFG80211_SME_CONNECTED;
47 cfg80211_upload_connect_keys(wdev); 46 cfg80211_upload_connect_keys(wdev);
48 47
49 nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, 48 nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid,
@@ -64,8 +63,6 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
64 63
65 trace_cfg80211_ibss_joined(dev, bssid); 64 trace_cfg80211_ibss_joined(dev, bssid);
66 65
67 CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING);
68
69 ev = kzalloc(sizeof(*ev), gfp); 66 ev = kzalloc(sizeof(*ev), gfp);
70 if (!ev) 67 if (!ev)
71 return; 68 return;
@@ -120,7 +117,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
120#ifdef CONFIG_CFG80211_WEXT 117#ifdef CONFIG_CFG80211_WEXT
121 wdev->wext.ibss.chandef = params->chandef; 118 wdev->wext.ibss.chandef = params->chandef;
122#endif 119#endif
123 wdev->sme_state = CFG80211_SME_CONNECTING;
124 120
125 err = cfg80211_can_use_chan(rdev, wdev, params->chandef.chan, 121 err = cfg80211_can_use_chan(rdev, wdev, params->chandef.chan,
126 params->channel_fixed 122 params->channel_fixed
@@ -134,7 +130,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
134 err = rdev_join_ibss(rdev, dev, params); 130 err = rdev_join_ibss(rdev, dev, params);
135 if (err) { 131 if (err) {
136 wdev->connect_keys = NULL; 132 wdev->connect_keys = NULL;
137 wdev->sme_state = CFG80211_SME_IDLE;
138 return err; 133 return err;
139 } 134 }
140 135
@@ -152,11 +147,11 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
152 struct wireless_dev *wdev = dev->ieee80211_ptr; 147 struct wireless_dev *wdev = dev->ieee80211_ptr;
153 int err; 148 int err;
154 149
155 mutex_lock(&rdev->devlist_mtx); 150 ASSERT_RTNL();
151
156 wdev_lock(wdev); 152 wdev_lock(wdev);
157 err = __cfg80211_join_ibss(rdev, dev, params, connkeys); 153 err = __cfg80211_join_ibss(rdev, dev, params, connkeys);
158 wdev_unlock(wdev); 154 wdev_unlock(wdev);
159 mutex_unlock(&rdev->devlist_mtx);
160 155
161 return err; 156 return err;
162} 157}
@@ -186,7 +181,6 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
186 } 181 }
187 182
188 wdev->current_bss = NULL; 183 wdev->current_bss = NULL;
189 wdev->sme_state = CFG80211_SME_IDLE;
190 wdev->ssid_len = 0; 184 wdev->ssid_len = 0;
191#ifdef CONFIG_CFG80211_WEXT 185#ifdef CONFIG_CFG80211_WEXT
192 if (!nowext) 186 if (!nowext)
@@ -359,11 +353,9 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
359 wdev->wext.ibss.channel_fixed = false; 353 wdev->wext.ibss.channel_fixed = false;
360 } 354 }
361 355
362 mutex_lock(&rdev->devlist_mtx);
363 wdev_lock(wdev); 356 wdev_lock(wdev);
364 err = cfg80211_ibss_wext_join(rdev, wdev); 357 err = cfg80211_ibss_wext_join(rdev, wdev);
365 wdev_unlock(wdev); 358 wdev_unlock(wdev);
366 mutex_unlock(&rdev->devlist_mtx);
367 359
368 return err; 360 return err;
369} 361}
@@ -429,11 +421,9 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
429 memcpy(wdev->wext.ibss.ssid, ssid, len); 421 memcpy(wdev->wext.ibss.ssid, ssid, len);
430 wdev->wext.ibss.ssid_len = len; 422 wdev->wext.ibss.ssid_len = len;
431 423
432 mutex_lock(&rdev->devlist_mtx);
433 wdev_lock(wdev); 424 wdev_lock(wdev);
434 err = cfg80211_ibss_wext_join(rdev, wdev); 425 err = cfg80211_ibss_wext_join(rdev, wdev);
435 wdev_unlock(wdev); 426 wdev_unlock(wdev);
436 mutex_unlock(&rdev->devlist_mtx);
437 427
438 return err; 428 return err;
439} 429}
@@ -512,11 +502,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
512 } else 502 } else
513 wdev->wext.ibss.bssid = NULL; 503 wdev->wext.ibss.bssid = NULL;
514 504
515 mutex_lock(&rdev->devlist_mtx);
516 wdev_lock(wdev); 505 wdev_lock(wdev);
517 err = cfg80211_ibss_wext_join(rdev, wdev); 506 err = cfg80211_ibss_wext_join(rdev, wdev);
518 wdev_unlock(wdev); 507 wdev_unlock(wdev);
519 mutex_unlock(&rdev->devlist_mtx);
520 508
521 return err; 509 return err;
522} 510}
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 0bb93f3061a4..0553fd4d85ae 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -18,6 +18,7 @@
18#define MESH_PATH_TO_ROOT_TIMEOUT 6000 18#define MESH_PATH_TO_ROOT_TIMEOUT 6000
19#define MESH_ROOT_INTERVAL 5000 19#define MESH_ROOT_INTERVAL 5000
20#define MESH_ROOT_CONFIRMATION_INTERVAL 2000 20#define MESH_ROOT_CONFIRMATION_INTERVAL 2000
21#define MESH_DEFAULT_PLINK_TIMEOUT 1800 /* timeout in seconds */
21 22
22/* 23/*
23 * Minimum interval between two consecutive PREQs originated by the same 24 * Minimum interval between two consecutive PREQs originated by the same
@@ -75,6 +76,7 @@ const struct mesh_config default_mesh_config = {
75 .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, 76 .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL,
76 .power_mode = NL80211_MESH_POWER_ACTIVE, 77 .power_mode = NL80211_MESH_POWER_ACTIVE,
77 .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, 78 .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW,
79 .plink_timeout = MESH_DEFAULT_PLINK_TIMEOUT,
78}; 80};
79 81
80const struct mesh_setup default_mesh_setup = { 82const struct mesh_setup default_mesh_setup = {
@@ -82,6 +84,7 @@ const struct mesh_setup default_mesh_setup = {
82 .sync_method = IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET, 84 .sync_method = IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET,
83 .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, 85 .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP,
84 .path_metric = IEEE80211_PATH_METRIC_AIRTIME, 86 .path_metric = IEEE80211_PATH_METRIC_AIRTIME,
87 .auth_id = 0, /* open */
85 .ie = NULL, 88 .ie = NULL,
86 .ie_len = 0, 89 .ie_len = 0,
87 .is_secure = false, 90 .is_secure = false,
@@ -159,6 +162,19 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
159 setup->chandef.center_freq1 = setup->chandef.chan->center_freq; 162 setup->chandef.center_freq1 = setup->chandef.chan->center_freq;
160 } 163 }
161 164
165 /*
166 * check if basic rates are available otherwise use mandatory rates as
167 * basic rates
168 */
169 if (!setup->basic_rates) {
170 enum nl80211_bss_scan_width scan_width;
171 struct ieee80211_supported_band *sband =
172 rdev->wiphy.bands[setup->chandef.chan->band];
173 scan_width = cfg80211_chandef_to_scan_width(&setup->chandef);
174 setup->basic_rates = ieee80211_mandatory_rates(sband,
175 scan_width);
176 }
177
162 if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) 178 if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef))
163 return -EINVAL; 179 return -EINVAL;
164 180
@@ -185,11 +201,9 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
185 struct wireless_dev *wdev = dev->ieee80211_ptr; 201 struct wireless_dev *wdev = dev->ieee80211_ptr;
186 int err; 202 int err;
187 203
188 mutex_lock(&rdev->devlist_mtx);
189 wdev_lock(wdev); 204 wdev_lock(wdev);
190 err = __cfg80211_join_mesh(rdev, dev, setup, conf); 205 err = __cfg80211_join_mesh(rdev, dev, setup, conf);
191 wdev_unlock(wdev); 206 wdev_unlock(wdev);
192 mutex_unlock(&rdev->devlist_mtx);
193 207
194 return err; 208 return err;
195} 209}
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 0c7b7dd855f6..8d49c1ce3dea 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -18,37 +18,18 @@
18#include "rdev-ops.h" 18#include "rdev-ops.h"
19 19
20 20
21void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len) 21void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
22{
23 struct wireless_dev *wdev = dev->ieee80211_ptr;
24 struct wiphy *wiphy = wdev->wiphy;
25 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
26
27 trace_cfg80211_send_rx_auth(dev);
28 wdev_lock(wdev);
29
30 nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL);
31 cfg80211_sme_rx_auth(dev, buf, len);
32
33 wdev_unlock(wdev);
34}
35EXPORT_SYMBOL(cfg80211_send_rx_auth);
36
37void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss,
38 const u8 *buf, size_t len) 22 const u8 *buf, size_t len)
39{ 23{
40 u16 status_code;
41 struct wireless_dev *wdev = dev->ieee80211_ptr; 24 struct wireless_dev *wdev = dev->ieee80211_ptr;
42 struct wiphy *wiphy = wdev->wiphy; 25 struct wiphy *wiphy = wdev->wiphy;
43 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 26 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
44 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; 27 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
45 u8 *ie = mgmt->u.assoc_resp.variable; 28 u8 *ie = mgmt->u.assoc_resp.variable;
46 int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); 29 int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
30 u16 status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
47 31
48 trace_cfg80211_send_rx_assoc(dev, bss); 32 trace_cfg80211_send_rx_assoc(dev, bss);
49 wdev_lock(wdev);
50
51 status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
52 33
53 /* 34 /*
54 * This is a bit of a hack, we don't notify userspace of 35 * This is a bit of a hack, we don't notify userspace of
@@ -56,174 +37,135 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss,
56 * and got a reject -- we only try again with an assoc 37 * and got a reject -- we only try again with an assoc
57 * frame instead of reassoc. 38 * frame instead of reassoc.
58 */ 39 */
59 if (status_code != WLAN_STATUS_SUCCESS && wdev->conn && 40 if (cfg80211_sme_rx_assoc_resp(wdev, status_code)) {
60 cfg80211_sme_failed_reassoc(wdev)) { 41 cfg80211_unhold_bss(bss_from_pub(bss));
61 cfg80211_put_bss(wiphy, bss); 42 cfg80211_put_bss(wiphy, bss);
62 goto out; 43 return;
63 } 44 }
64 45
65 nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL); 46 nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL);
66 47 /* update current_bss etc., consumes the bss reference */
67 if (status_code != WLAN_STATUS_SUCCESS && wdev->conn) {
68 cfg80211_sme_failed_assoc(wdev);
69 /*
70 * do not call connect_result() now because the
71 * sme will schedule work that does it later.
72 */
73 cfg80211_put_bss(wiphy, bss);
74 goto out;
75 }
76
77 if (!wdev->conn && wdev->sme_state == CFG80211_SME_IDLE) {
78 /*
79 * This is for the userspace SME, the CONNECTING
80 * state will be changed to CONNECTED by
81 * __cfg80211_connect_result() below.
82 */
83 wdev->sme_state = CFG80211_SME_CONNECTING;
84 }
85
86 /* this consumes the bss reference */
87 __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs, 48 __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs,
88 status_code, 49 status_code,
89 status_code == WLAN_STATUS_SUCCESS, bss); 50 status_code == WLAN_STATUS_SUCCESS, bss);
90 out:
91 wdev_unlock(wdev);
92} 51}
93EXPORT_SYMBOL(cfg80211_send_rx_assoc); 52EXPORT_SYMBOL(cfg80211_rx_assoc_resp);
94 53
95void __cfg80211_send_deauth(struct net_device *dev, 54static void cfg80211_process_auth(struct wireless_dev *wdev,
96 const u8 *buf, size_t len) 55 const u8 *buf, size_t len)
97{ 56{
98 struct wireless_dev *wdev = dev->ieee80211_ptr; 57 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
99 struct wiphy *wiphy = wdev->wiphy;
100 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
101 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
102 const u8 *bssid = mgmt->bssid;
103 bool was_current = false;
104 58
105 trace___cfg80211_send_deauth(dev); 59 nl80211_send_rx_auth(rdev, wdev->netdev, buf, len, GFP_KERNEL);
106 ASSERT_WDEV_LOCK(wdev); 60 cfg80211_sme_rx_auth(wdev, buf, len);
107 61}
108 if (wdev->current_bss &&
109 ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) {
110 cfg80211_unhold_bss(wdev->current_bss);
111 cfg80211_put_bss(wiphy, &wdev->current_bss->pub);
112 wdev->current_bss = NULL;
113 was_current = true;
114 }
115 62
116 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL); 63static void cfg80211_process_deauth(struct wireless_dev *wdev,
64 const u8 *buf, size_t len)
65{
66 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
67 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
68 const u8 *bssid = mgmt->bssid;
69 u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
70 bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr);
117 71
118 if (wdev->sme_state == CFG80211_SME_CONNECTED && was_current) { 72 nl80211_send_deauth(rdev, wdev->netdev, buf, len, GFP_KERNEL);
119 u16 reason_code;
120 bool from_ap;
121 73
122 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); 74 if (!wdev->current_bss ||
75 !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))
76 return;
123 77
124 from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); 78 __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap);
125 __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); 79 cfg80211_sme_deauth(wdev);
126 } else if (wdev->sme_state == CFG80211_SME_CONNECTING) {
127 __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0,
128 WLAN_STATUS_UNSPECIFIED_FAILURE,
129 false, NULL);
130 }
131} 80}
132EXPORT_SYMBOL(__cfg80211_send_deauth);
133 81
134void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len) 82static void cfg80211_process_disassoc(struct wireless_dev *wdev,
83 const u8 *buf, size_t len)
135{ 84{
136 struct wireless_dev *wdev = dev->ieee80211_ptr; 85 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
86 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
87 const u8 *bssid = mgmt->bssid;
88 u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
89 bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr);
137 90
138 wdev_lock(wdev); 91 nl80211_send_disassoc(rdev, wdev->netdev, buf, len, GFP_KERNEL);
139 __cfg80211_send_deauth(dev, buf, len); 92
140 wdev_unlock(wdev); 93 if (WARN_ON(!wdev->current_bss ||
94 !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
95 return;
96
97 __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap);
98 cfg80211_sme_disassoc(wdev);
141} 99}
142EXPORT_SYMBOL(cfg80211_send_deauth);
143 100
144void __cfg80211_send_disassoc(struct net_device *dev, 101void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len)
145 const u8 *buf, size_t len)
146{ 102{
147 struct wireless_dev *wdev = dev->ieee80211_ptr; 103 struct wireless_dev *wdev = dev->ieee80211_ptr;
148 struct wiphy *wiphy = wdev->wiphy; 104 struct ieee80211_mgmt *mgmt = (void *)buf;
149 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
150 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
151 const u8 *bssid = mgmt->bssid;
152 u16 reason_code;
153 bool from_ap;
154 105
155 trace___cfg80211_send_disassoc(dev);
156 ASSERT_WDEV_LOCK(wdev); 106 ASSERT_WDEV_LOCK(wdev);
157 107
158 nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL); 108 trace_cfg80211_rx_mlme_mgmt(dev, buf, len);
159 109
160 if (wdev->sme_state != CFG80211_SME_CONNECTED) 110 if (WARN_ON(len < 2))
161 return; 111 return;
162 112
163 if (wdev->current_bss && 113 if (ieee80211_is_auth(mgmt->frame_control))
164 ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { 114 cfg80211_process_auth(wdev, buf, len);
165 cfg80211_sme_disassoc(dev, wdev->current_bss); 115 else if (ieee80211_is_deauth(mgmt->frame_control))
166 cfg80211_unhold_bss(wdev->current_bss); 116 cfg80211_process_deauth(wdev, buf, len);
167 cfg80211_put_bss(wiphy, &wdev->current_bss->pub); 117 else if (ieee80211_is_disassoc(mgmt->frame_control))
168 wdev->current_bss = NULL; 118 cfg80211_process_disassoc(wdev, buf, len);
169 } else
170 WARN_ON(1);
171
172
173 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
174
175 from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr);
176 __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap);
177} 119}
178EXPORT_SYMBOL(__cfg80211_send_disassoc); 120EXPORT_SYMBOL(cfg80211_rx_mlme_mgmt);
179 121
180void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) 122void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr)
181{ 123{
182 struct wireless_dev *wdev = dev->ieee80211_ptr; 124 struct wireless_dev *wdev = dev->ieee80211_ptr;
125 struct wiphy *wiphy = wdev->wiphy;
126 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
127
128 trace_cfg80211_send_auth_timeout(dev, addr);
183 129
184 wdev_lock(wdev); 130 nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL);
185 __cfg80211_send_disassoc(dev, buf, len); 131 cfg80211_sme_auth_timeout(wdev);
186 wdev_unlock(wdev);
187} 132}
188EXPORT_SYMBOL(cfg80211_send_disassoc); 133EXPORT_SYMBOL(cfg80211_auth_timeout);
189 134
190void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) 135void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss)
191{ 136{
192 struct wireless_dev *wdev = dev->ieee80211_ptr; 137 struct wireless_dev *wdev = dev->ieee80211_ptr;
193 struct wiphy *wiphy = wdev->wiphy; 138 struct wiphy *wiphy = wdev->wiphy;
194 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 139 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
195 140
196 trace_cfg80211_send_auth_timeout(dev, addr); 141 trace_cfg80211_send_assoc_timeout(dev, bss->bssid);
197 wdev_lock(wdev);
198 142
199 nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); 143 nl80211_send_assoc_timeout(rdev, dev, bss->bssid, GFP_KERNEL);
200 if (wdev->sme_state == CFG80211_SME_CONNECTING) 144 cfg80211_sme_assoc_timeout(wdev);
201 __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
202 WLAN_STATUS_UNSPECIFIED_FAILURE,
203 false, NULL);
204 145
205 wdev_unlock(wdev); 146 cfg80211_unhold_bss(bss_from_pub(bss));
147 cfg80211_put_bss(wiphy, bss);
206} 148}
207EXPORT_SYMBOL(cfg80211_send_auth_timeout); 149EXPORT_SYMBOL(cfg80211_assoc_timeout);
208 150
209void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr) 151void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len)
210{ 152{
211 struct wireless_dev *wdev = dev->ieee80211_ptr; 153 struct wireless_dev *wdev = dev->ieee80211_ptr;
212 struct wiphy *wiphy = wdev->wiphy; 154 struct ieee80211_mgmt *mgmt = (void *)buf;
213 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
214 155
215 trace_cfg80211_send_assoc_timeout(dev, addr); 156 ASSERT_WDEV_LOCK(wdev);
216 wdev_lock(wdev);
217 157
218 nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL); 158 trace_cfg80211_tx_mlme_mgmt(dev, buf, len);
219 if (wdev->sme_state == CFG80211_SME_CONNECTING)
220 __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
221 WLAN_STATUS_UNSPECIFIED_FAILURE,
222 false, NULL);
223 159
224 wdev_unlock(wdev); 160 if (WARN_ON(len < 2))
161 return;
162
163 if (ieee80211_is_deauth(mgmt->frame_control))
164 cfg80211_process_deauth(wdev, buf, len);
165 else
166 cfg80211_process_disassoc(wdev, buf, len);
225} 167}
226EXPORT_SYMBOL(cfg80211_send_assoc_timeout); 168EXPORT_SYMBOL(cfg80211_tx_mlme_mgmt);
227 169
228void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, 170void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
229 enum nl80211_key_type key_type, int key_id, 171 enum nl80211_key_type key_type, int key_id,
@@ -253,18 +195,27 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
253EXPORT_SYMBOL(cfg80211_michael_mic_failure); 195EXPORT_SYMBOL(cfg80211_michael_mic_failure);
254 196
255/* some MLME handling for userspace SME */ 197/* some MLME handling for userspace SME */
256int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, 198int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
257 struct net_device *dev, 199 struct net_device *dev,
258 struct ieee80211_channel *chan, 200 struct ieee80211_channel *chan,
259 enum nl80211_auth_type auth_type, 201 enum nl80211_auth_type auth_type,
260 const u8 *bssid, 202 const u8 *bssid,
261 const u8 *ssid, int ssid_len, 203 const u8 *ssid, int ssid_len,
262 const u8 *ie, int ie_len, 204 const u8 *ie, int ie_len,
263 const u8 *key, int key_len, int key_idx, 205 const u8 *key, int key_len, int key_idx,
264 const u8 *sae_data, int sae_data_len) 206 const u8 *sae_data, int sae_data_len)
265{ 207{
266 struct wireless_dev *wdev = dev->ieee80211_ptr; 208 struct wireless_dev *wdev = dev->ieee80211_ptr;
267 struct cfg80211_auth_request req; 209 struct cfg80211_auth_request req = {
210 .ie = ie,
211 .ie_len = ie_len,
212 .sae_data = sae_data,
213 .sae_data_len = sae_data_len,
214 .auth_type = auth_type,
215 .key = key,
216 .key_len = key_len,
217 .key_idx = key_idx,
218 };
268 int err; 219 int err;
269 220
270 ASSERT_WDEV_LOCK(wdev); 221 ASSERT_WDEV_LOCK(wdev);
@@ -277,18 +228,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
277 ether_addr_equal(bssid, wdev->current_bss->pub.bssid)) 228 ether_addr_equal(bssid, wdev->current_bss->pub.bssid))
278 return -EALREADY; 229 return -EALREADY;
279 230
280 memset(&req, 0, sizeof(req));
281
282 req.ie = ie;
283 req.ie_len = ie_len;
284 req.sae_data = sae_data;
285 req.sae_data_len = sae_data_len;
286 req.auth_type = auth_type;
287 req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, 231 req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
288 WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); 232 WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
289 req.key = key;
290 req.key_len = key_len;
291 req.key_idx = key_idx;
292 if (!req.bss) 233 if (!req.bss)
293 return -ENOENT; 234 return -ENOENT;
294 235
@@ -304,28 +245,6 @@ out:
304 return err; 245 return err;
305} 246}
306 247
307int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
308 struct net_device *dev, struct ieee80211_channel *chan,
309 enum nl80211_auth_type auth_type, const u8 *bssid,
310 const u8 *ssid, int ssid_len,
311 const u8 *ie, int ie_len,
312 const u8 *key, int key_len, int key_idx,
313 const u8 *sae_data, int sae_data_len)
314{
315 int err;
316
317 mutex_lock(&rdev->devlist_mtx);
318 wdev_lock(dev->ieee80211_ptr);
319 err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
320 ssid, ssid_len, ie, ie_len,
321 key, key_len, key_idx,
322 sae_data, sae_data_len);
323 wdev_unlock(dev->ieee80211_ptr);
324 mutex_unlock(&rdev->devlist_mtx);
325
326 return err;
327}
328
329/* Do a logical ht_capa &= ht_capa_mask. */ 248/* Do a logical ht_capa &= ht_capa_mask. */
330void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, 249void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
331 const struct ieee80211_ht_cap *ht_capa_mask) 250 const struct ieee80211_ht_cap *ht_capa_mask)
@@ -360,30 +279,21 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
360 p1[i] &= p2[i]; 279 p1[i] &= p2[i];
361} 280}
362 281
363int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 282int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
364 struct net_device *dev, 283 struct net_device *dev,
365 struct ieee80211_channel *chan, 284 struct ieee80211_channel *chan,
366 const u8 *bssid, 285 const u8 *bssid,
367 const u8 *ssid, int ssid_len, 286 const u8 *ssid, int ssid_len,
368 struct cfg80211_assoc_request *req) 287 struct cfg80211_assoc_request *req)
369{ 288{
370 struct wireless_dev *wdev = dev->ieee80211_ptr; 289 struct wireless_dev *wdev = dev->ieee80211_ptr;
371 int err; 290 int err;
372 bool was_connected = false;
373 291
374 ASSERT_WDEV_LOCK(wdev); 292 ASSERT_WDEV_LOCK(wdev);
375 293
376 if (wdev->current_bss && req->prev_bssid && 294 if (wdev->current_bss &&
377 ether_addr_equal(wdev->current_bss->pub.bssid, req->prev_bssid)) { 295 (!req->prev_bssid || !ether_addr_equal(wdev->current_bss->pub.bssid,
378 /* 296 req->prev_bssid)))
379 * Trying to reassociate: Allow this to proceed and let the old
380 * association to be dropped when the new one is completed.
381 */
382 if (wdev->sme_state == CFG80211_SME_CONNECTED) {
383 was_connected = true;
384 wdev->sme_state = CFG80211_SME_CONNECTING;
385 }
386 } else if (wdev->current_bss)
387 return -EALREADY; 297 return -EALREADY;
388 298
389 cfg80211_oper_and_ht_capa(&req->ht_capa_mask, 299 cfg80211_oper_and_ht_capa(&req->ht_capa_mask,
@@ -393,52 +303,28 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
393 303
394 req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, 304 req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
395 WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); 305 WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
396 if (!req->bss) { 306 if (!req->bss)
397 if (was_connected)
398 wdev->sme_state = CFG80211_SME_CONNECTED;
399 return -ENOENT; 307 return -ENOENT;
400 }
401 308
402 err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED); 309 err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED);
403 if (err) 310 if (err)
404 goto out; 311 goto out;
405 312
406 err = rdev_assoc(rdev, dev, req); 313 err = rdev_assoc(rdev, dev, req);
314 if (!err)
315 cfg80211_hold_bss(bss_from_pub(req->bss));
407 316
408out: 317out:
409 if (err) { 318 if (err)
410 if (was_connected)
411 wdev->sme_state = CFG80211_SME_CONNECTED;
412 cfg80211_put_bss(&rdev->wiphy, req->bss); 319 cfg80211_put_bss(&rdev->wiphy, req->bss);
413 }
414 320
415 return err; 321 return err;
416} 322}
417 323
418int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 324int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
419 struct net_device *dev, 325 struct net_device *dev, const u8 *bssid,
420 struct ieee80211_channel *chan, 326 const u8 *ie, int ie_len, u16 reason,
421 const u8 *bssid, 327 bool local_state_change)
422 const u8 *ssid, int ssid_len,
423 struct cfg80211_assoc_request *req)
424{
425 struct wireless_dev *wdev = dev->ieee80211_ptr;
426 int err;
427
428 mutex_lock(&rdev->devlist_mtx);
429 wdev_lock(wdev);
430 err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid,
431 ssid, ssid_len, req);
432 wdev_unlock(wdev);
433 mutex_unlock(&rdev->devlist_mtx);
434
435 return err;
436}
437
438int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
439 struct net_device *dev, const u8 *bssid,
440 const u8 *ie, int ie_len, u16 reason,
441 bool local_state_change)
442{ 328{
443 struct wireless_dev *wdev = dev->ieee80211_ptr; 329 struct wireless_dev *wdev = dev->ieee80211_ptr;
444 struct cfg80211_deauth_request req = { 330 struct cfg80211_deauth_request req = {
@@ -451,79 +337,51 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
451 337
452 ASSERT_WDEV_LOCK(wdev); 338 ASSERT_WDEV_LOCK(wdev);
453 339
454 if (local_state_change && (!wdev->current_bss || 340 if (local_state_change &&
455 !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) 341 (!wdev->current_bss ||
342 !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
456 return 0; 343 return 0;
457 344
458 return rdev_deauth(rdev, dev, &req); 345 return rdev_deauth(rdev, dev, &req);
459} 346}
460 347
461int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, 348int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
462 struct net_device *dev, const u8 *bssid, 349 struct net_device *dev, const u8 *bssid,
463 const u8 *ie, int ie_len, u16 reason, 350 const u8 *ie, int ie_len, u16 reason,
464 bool local_state_change) 351 bool local_state_change)
465{ 352{
466 struct wireless_dev *wdev = dev->ieee80211_ptr; 353 struct wireless_dev *wdev = dev->ieee80211_ptr;
354 struct cfg80211_disassoc_request req = {
355 .reason_code = reason,
356 .local_state_change = local_state_change,
357 .ie = ie,
358 .ie_len = ie_len,
359 };
467 int err; 360 int err;
468 361
469 wdev_lock(wdev);
470 err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason,
471 local_state_change);
472 wdev_unlock(wdev);
473
474 return err;
475}
476
477static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
478 struct net_device *dev, const u8 *bssid,
479 const u8 *ie, int ie_len, u16 reason,
480 bool local_state_change)
481{
482 struct wireless_dev *wdev = dev->ieee80211_ptr;
483 struct cfg80211_disassoc_request req;
484
485 ASSERT_WDEV_LOCK(wdev); 362 ASSERT_WDEV_LOCK(wdev);
486 363
487 if (wdev->sme_state != CFG80211_SME_CONNECTED) 364 if (!wdev->current_bss)
488 return -ENOTCONN;
489
490 if (WARN(!wdev->current_bss, "sme_state=%d\n", wdev->sme_state))
491 return -ENOTCONN; 365 return -ENOTCONN;
492 366
493 memset(&req, 0, sizeof(req));
494 req.reason_code = reason;
495 req.local_state_change = local_state_change;
496 req.ie = ie;
497 req.ie_len = ie_len;
498 if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) 367 if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid))
499 req.bss = &wdev->current_bss->pub; 368 req.bss = &wdev->current_bss->pub;
500 else 369 else
501 return -ENOTCONN; 370 return -ENOTCONN;
502 371
503 return rdev_disassoc(rdev, dev, &req); 372 err = rdev_disassoc(rdev, dev, &req);
504} 373 if (err)
505 374 return err;
506int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
507 struct net_device *dev, const u8 *bssid,
508 const u8 *ie, int ie_len, u16 reason,
509 bool local_state_change)
510{
511 struct wireless_dev *wdev = dev->ieee80211_ptr;
512 int err;
513
514 wdev_lock(wdev);
515 err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason,
516 local_state_change);
517 wdev_unlock(wdev);
518 375
519 return err; 376 /* driver should have reported the disassoc */
377 WARN_ON(wdev->current_bss);
378 return 0;
520} 379}
521 380
522void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, 381void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
523 struct net_device *dev) 382 struct net_device *dev)
524{ 383{
525 struct wireless_dev *wdev = dev->ieee80211_ptr; 384 struct wireless_dev *wdev = dev->ieee80211_ptr;
526 struct cfg80211_deauth_request req;
527 u8 bssid[ETH_ALEN]; 385 u8 bssid[ETH_ALEN];
528 386
529 ASSERT_WDEV_LOCK(wdev); 387 ASSERT_WDEV_LOCK(wdev);
@@ -531,23 +389,12 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
531 if (!rdev->ops->deauth) 389 if (!rdev->ops->deauth)
532 return; 390 return;
533 391
534 memset(&req, 0, sizeof(req));
535 req.reason_code = WLAN_REASON_DEAUTH_LEAVING;
536 req.ie = NULL;
537 req.ie_len = 0;
538
539 if (!wdev->current_bss) 392 if (!wdev->current_bss)
540 return; 393 return;
541 394
542 memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); 395 memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN);
543 req.bssid = bssid; 396 cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0,
544 rdev_deauth(rdev, dev, &req); 397 WLAN_REASON_DEAUTH_LEAVING, false);
545
546 if (wdev->current_bss) {
547 cfg80211_unhold_bss(wdev->current_bss);
548 cfg80211_put_bss(&rdev->wiphy, &wdev->current_bss->pub);
549 wdev->current_bss = NULL;
550 }
551} 398}
552 399
553struct cfg80211_mgmt_registration { 400struct cfg80211_mgmt_registration {
@@ -774,7 +621,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
774} 621}
775 622
776bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, 623bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
777 const u8 *buf, size_t len, gfp_t gfp) 624 const u8 *buf, size_t len, u32 flags, gfp_t gfp)
778{ 625{
779 struct wiphy *wiphy = wdev->wiphy; 626 struct wiphy *wiphy = wdev->wiphy;
780 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 627 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
@@ -817,7 +664,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
817 /* Indicate the received Action frame to user space */ 664 /* Indicate the received Action frame to user space */
818 if (nl80211_send_mgmt(rdev, wdev, reg->nlportid, 665 if (nl80211_send_mgmt(rdev, wdev, reg->nlportid,
819 freq, sig_mbm, 666 freq, sig_mbm,
820 buf, len, gfp)) 667 buf, len, flags, gfp))
821 continue; 668 continue;
822 669
823 result = true; 670 result = true;
@@ -848,7 +695,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
848 dfs_update_channels_wk); 695 dfs_update_channels_wk);
849 wiphy = &rdev->wiphy; 696 wiphy = &rdev->wiphy;
850 697
851 mutex_lock(&cfg80211_mutex); 698 rtnl_lock();
852 for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { 699 for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) {
853 sband = wiphy->bands[bandid]; 700 sband = wiphy->bands[bandid];
854 if (!sband) 701 if (!sband)
@@ -881,7 +728,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
881 check_again = true; 728 check_again = true;
882 } 729 }
883 } 730 }
884 mutex_unlock(&cfg80211_mutex); 731 rtnl_unlock();
885 732
886 /* reschedule if there are other channels waiting to be cleared again */ 733 /* reschedule if there are other channels waiting to be cleared again */
887 if (check_again) 734 if (check_again)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d5aed3bb3945..af8d84a4a5b2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -37,10 +37,10 @@ static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb,
37 37
38/* the netlink family */ 38/* the netlink family */
39static struct genl_family nl80211_fam = { 39static struct genl_family nl80211_fam = {
40 .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ 40 .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */
41 .name = "nl80211", /* have users key off the name instead */ 41 .name = NL80211_GENL_NAME, /* have users key off the name instead */
42 .hdrsize = 0, /* no private header */ 42 .hdrsize = 0, /* no private header */
43 .version = 1, /* no particular meaning now */ 43 .version = 1, /* no particular meaning now */
44 .maxattr = NL80211_ATTR_MAX, 44 .maxattr = NL80211_ATTR_MAX,
45 .netnsok = true, 45 .netnsok = true,
46 .pre_doit = nl80211_pre_doit, 46 .pre_doit = nl80211_pre_doit,
@@ -59,7 +59,7 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs)
59 int wiphy_idx = -1; 59 int wiphy_idx = -1;
60 int ifidx = -1; 60 int ifidx = -1;
61 61
62 assert_cfg80211_lock(); 62 ASSERT_RTNL();
63 63
64 if (!have_ifidx && !have_wdev_id) 64 if (!have_ifidx && !have_wdev_id)
65 return ERR_PTR(-EINVAL); 65 return ERR_PTR(-EINVAL);
@@ -80,7 +80,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs)
80 if (have_wdev_id && rdev->wiphy_idx != wiphy_idx) 80 if (have_wdev_id && rdev->wiphy_idx != wiphy_idx)
81 continue; 81 continue;
82 82
83 mutex_lock(&rdev->devlist_mtx);
84 list_for_each_entry(wdev, &rdev->wdev_list, list) { 83 list_for_each_entry(wdev, &rdev->wdev_list, list) {
85 if (have_ifidx && wdev->netdev && 84 if (have_ifidx && wdev->netdev &&
86 wdev->netdev->ifindex == ifidx) { 85 wdev->netdev->ifindex == ifidx) {
@@ -92,7 +91,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs)
92 break; 91 break;
93 } 92 }
94 } 93 }
95 mutex_unlock(&rdev->devlist_mtx);
96 94
97 if (result) 95 if (result)
98 break; 96 break;
@@ -109,7 +107,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs)
109 struct cfg80211_registered_device *rdev = NULL, *tmp; 107 struct cfg80211_registered_device *rdev = NULL, *tmp;
110 struct net_device *netdev; 108 struct net_device *netdev;
111 109
112 assert_cfg80211_lock(); 110 ASSERT_RTNL();
113 111
114 if (!attrs[NL80211_ATTR_WIPHY] && 112 if (!attrs[NL80211_ATTR_WIPHY] &&
115 !attrs[NL80211_ATTR_IFINDEX] && 113 !attrs[NL80211_ATTR_IFINDEX] &&
@@ -128,14 +126,12 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs)
128 tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32); 126 tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32);
129 if (tmp) { 127 if (tmp) {
130 /* make sure wdev exists */ 128 /* make sure wdev exists */
131 mutex_lock(&tmp->devlist_mtx);
132 list_for_each_entry(wdev, &tmp->wdev_list, list) { 129 list_for_each_entry(wdev, &tmp->wdev_list, list) {
133 if (wdev->identifier != (u32)wdev_id) 130 if (wdev->identifier != (u32)wdev_id)
134 continue; 131 continue;
135 found = true; 132 found = true;
136 break; 133 break;
137 } 134 }
138 mutex_unlock(&tmp->devlist_mtx);
139 135
140 if (!found) 136 if (!found)
141 tmp = NULL; 137 tmp = NULL;
@@ -182,19 +178,6 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs)
182/* 178/*
183 * This function returns a pointer to the driver 179 * This function returns a pointer to the driver
184 * that the genl_info item that is passed refers to. 180 * that the genl_info item that is passed refers to.
185 * If successful, it returns non-NULL and also locks
186 * the driver's mutex!
187 *
188 * This means that you need to call cfg80211_unlock_rdev()
189 * before being allowed to acquire &cfg80211_mutex!
190 *
191 * This is necessary because we need to lock the global
192 * mutex to get an item off the list safely, and then
193 * we lock the rdev mutex so it doesn't go away under us.
194 *
195 * We don't want to keep cfg80211_mutex locked
196 * for all the time in order to allow requests on
197 * other interfaces to go through at the same time.
198 * 181 *
199 * The result of this can be a PTR_ERR and hence must 182 * The result of this can be a PTR_ERR and hence must
200 * be checked with IS_ERR() for errors. 183 * be checked with IS_ERR() for errors.
@@ -202,20 +185,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs)
202static struct cfg80211_registered_device * 185static struct cfg80211_registered_device *
203cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info) 186cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info)
204{ 187{
205 struct cfg80211_registered_device *rdev; 188 return __cfg80211_rdev_from_attrs(netns, info->attrs);
206
207 mutex_lock(&cfg80211_mutex);
208 rdev = __cfg80211_rdev_from_attrs(netns, info->attrs);
209
210 /* if it is not an error we grab the lock on
211 * it to assure it won't be going away while
212 * we operate on it */
213 if (!IS_ERR(rdev))
214 mutex_lock(&rdev->mtx);
215
216 mutex_unlock(&cfg80211_mutex);
217
218 return rdev;
219} 189}
220 190
221/* policy for the attributes */ 191/* policy for the attributes */
@@ -378,6 +348,12 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
378 [NL80211_ATTR_MDID] = { .type = NLA_U16 }, 348 [NL80211_ATTR_MDID] = { .type = NLA_U16 },
379 [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, 349 [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
380 .len = IEEE80211_MAX_DATA_LEN }, 350 .len = IEEE80211_MAX_DATA_LEN },
351 [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 },
352 [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
353 [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
354 [NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
355 [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 },
356 [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 },
381}; 357};
382 358
383/* policy for the key attributes */ 359/* policy for the key attributes */
@@ -432,6 +408,14 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
432 [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 }, 408 [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 },
433}; 409};
434 410
411/* policy for coalesce rule attributes */
412static const struct nla_policy
413nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = {
414 [NL80211_ATTR_COALESCE_RULE_DELAY] = { .type = NLA_U32 },
415 [NL80211_ATTR_COALESCE_RULE_CONDITION] = { .type = NLA_U32 },
416 [NL80211_ATTR_COALESCE_RULE_PKT_PATTERN] = { .type = NLA_NESTED },
417};
418
435/* policy for GTK rekey offload attributes */ 419/* policy for GTK rekey offload attributes */
436static const struct nla_policy 420static const struct nla_policy
437nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = { 421nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
@@ -455,7 +439,6 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
455 int err; 439 int err;
456 440
457 rtnl_lock(); 441 rtnl_lock();
458 mutex_lock(&cfg80211_mutex);
459 442
460 if (!cb->args[0]) { 443 if (!cb->args[0]) {
461 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, 444 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
@@ -471,10 +454,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
471 goto out_unlock; 454 goto out_unlock;
472 } 455 }
473 *rdev = wiphy_to_dev((*wdev)->wiphy); 456 *rdev = wiphy_to_dev((*wdev)->wiphy);
474 cb->args[0] = (*rdev)->wiphy_idx; 457 /* 0 is the first index - add 1 to parse only once */
458 cb->args[0] = (*rdev)->wiphy_idx + 1;
475 cb->args[1] = (*wdev)->identifier; 459 cb->args[1] = (*wdev)->identifier;
476 } else { 460 } else {
477 struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]); 461 /* subtract the 1 again here */
462 struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
478 struct wireless_dev *tmp; 463 struct wireless_dev *tmp;
479 464
480 if (!wiphy) { 465 if (!wiphy) {
@@ -484,14 +469,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
484 *rdev = wiphy_to_dev(wiphy); 469 *rdev = wiphy_to_dev(wiphy);
485 *wdev = NULL; 470 *wdev = NULL;
486 471
487 mutex_lock(&(*rdev)->devlist_mtx);
488 list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { 472 list_for_each_entry(tmp, &(*rdev)->wdev_list, list) {
489 if (tmp->identifier == cb->args[1]) { 473 if (tmp->identifier == cb->args[1]) {
490 *wdev = tmp; 474 *wdev = tmp;
491 break; 475 break;
492 } 476 }
493 } 477 }
494 mutex_unlock(&(*rdev)->devlist_mtx);
495 478
496 if (!*wdev) { 479 if (!*wdev) {
497 err = -ENODEV; 480 err = -ENODEV;
@@ -499,19 +482,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
499 } 482 }
500 } 483 }
501 484
502 cfg80211_lock_rdev(*rdev);
503
504 mutex_unlock(&cfg80211_mutex);
505 return 0; 485 return 0;
506 out_unlock: 486 out_unlock:
507 mutex_unlock(&cfg80211_mutex);
508 rtnl_unlock(); 487 rtnl_unlock();
509 return err; 488 return err;
510} 489}
511 490
512static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev) 491static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev)
513{ 492{
514 cfg80211_unlock_rdev(rdev);
515 rtnl_unlock(); 493 rtnl_unlock();
516} 494}
517 495
@@ -837,12 +815,9 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
837 case NL80211_IFTYPE_MESH_POINT: 815 case NL80211_IFTYPE_MESH_POINT:
838 break; 816 break;
839 case NL80211_IFTYPE_ADHOC: 817 case NL80211_IFTYPE_ADHOC:
840 if (!wdev->current_bss)
841 return -ENOLINK;
842 break;
843 case NL80211_IFTYPE_STATION: 818 case NL80211_IFTYPE_STATION:
844 case NL80211_IFTYPE_P2P_CLIENT: 819 case NL80211_IFTYPE_P2P_CLIENT:
845 if (wdev->sme_state != CFG80211_SME_CONNECTED) 820 if (!wdev->current_bss)
846 return -ENOLINK; 821 return -ENOLINK;
847 break; 822 break;
848 default: 823 default:
@@ -945,7 +920,7 @@ nla_put_failure:
945static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, 920static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev,
946 struct sk_buff *msg) 921 struct sk_buff *msg)
947{ 922{
948 const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; 923 const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan->tcp;
949 struct nlattr *nl_tcp; 924 struct nlattr *nl_tcp;
950 925
951 if (!tcp) 926 if (!tcp)
@@ -988,37 +963,37 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
988{ 963{
989 struct nlattr *nl_wowlan; 964 struct nlattr *nl_wowlan;
990 965
991 if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns) 966 if (!dev->wiphy.wowlan)
992 return 0; 967 return 0;
993 968
994 nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); 969 nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
995 if (!nl_wowlan) 970 if (!nl_wowlan)
996 return -ENOBUFS; 971 return -ENOBUFS;
997 972
998 if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && 973 if (((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) &&
999 nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || 974 nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
1000 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && 975 ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) &&
1001 nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || 976 nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
1002 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) && 977 ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) &&
1003 nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || 978 nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
1004 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && 979 ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
1005 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || 980 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
1006 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && 981 ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
1007 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || 982 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
1008 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && 983 ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
1009 nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || 984 nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
1010 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && 985 ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
1011 nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || 986 nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
1012 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) && 987 ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
1013 nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) 988 nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
1014 return -ENOBUFS; 989 return -ENOBUFS;
1015 990
1016 if (dev->wiphy.wowlan.n_patterns) { 991 if (dev->wiphy.wowlan->n_patterns) {
1017 struct nl80211_wowlan_pattern_support pat = { 992 struct nl80211_pattern_support pat = {
1018 .max_patterns = dev->wiphy.wowlan.n_patterns, 993 .max_patterns = dev->wiphy.wowlan->n_patterns,
1019 .min_pattern_len = dev->wiphy.wowlan.pattern_min_len, 994 .min_pattern_len = dev->wiphy.wowlan->pattern_min_len,
1020 .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, 995 .max_pattern_len = dev->wiphy.wowlan->pattern_max_len,
1021 .max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset, 996 .max_pkt_offset = dev->wiphy.wowlan->max_pkt_offset,
1022 }; 997 };
1023 998
1024 if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, 999 if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
@@ -1035,6 +1010,27 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
1035} 1010}
1036#endif 1011#endif
1037 1012
1013static int nl80211_send_coalesce(struct sk_buff *msg,
1014 struct cfg80211_registered_device *dev)
1015{
1016 struct nl80211_coalesce_rule_support rule;
1017
1018 if (!dev->wiphy.coalesce)
1019 return 0;
1020
1021 rule.max_rules = dev->wiphy.coalesce->n_rules;
1022 rule.max_delay = dev->wiphy.coalesce->max_delay;
1023 rule.pat.max_patterns = dev->wiphy.coalesce->n_patterns;
1024 rule.pat.min_pattern_len = dev->wiphy.coalesce->pattern_min_len;
1025 rule.pat.max_pattern_len = dev->wiphy.coalesce->pattern_max_len;
1026 rule.pat.max_pkt_offset = dev->wiphy.coalesce->max_pkt_offset;
1027
1028 if (nla_put(msg, NL80211_ATTR_COALESCE_RULE, sizeof(rule), &rule))
1029 return -ENOBUFS;
1030
1031 return 0;
1032}
1033
1038static int nl80211_send_band_rateinfo(struct sk_buff *msg, 1034static int nl80211_send_band_rateinfo(struct sk_buff *msg,
1039 struct ieee80211_supported_band *sband) 1035 struct ieee80211_supported_band *sband)
1040{ 1036{
@@ -1151,10 +1147,16 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg,
1151 return 0; 1147 return 0;
1152} 1148}
1153 1149
1150struct nl80211_dump_wiphy_state {
1151 s64 filter_wiphy;
1152 long start;
1153 long split_start, band_start, chan_start;
1154 bool split;
1155};
1156
1154static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, 1157static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1155 struct sk_buff *msg, u32 portid, u32 seq, 1158 struct sk_buff *msg, u32 portid, u32 seq,
1156 int flags, bool split, long *split_start, 1159 int flags, struct nl80211_dump_wiphy_state *state)
1157 long *band_start, long *chan_start)
1158{ 1160{
1159 void *hdr; 1161 void *hdr;
1160 struct nlattr *nl_bands, *nl_band; 1162 struct nlattr *nl_bands, *nl_band;
@@ -1165,19 +1167,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1165 int i; 1167 int i;
1166 const struct ieee80211_txrx_stypes *mgmt_stypes = 1168 const struct ieee80211_txrx_stypes *mgmt_stypes =
1167 dev->wiphy.mgmt_stypes; 1169 dev->wiphy.mgmt_stypes;
1168 long start = 0, start_chan = 0, start_band = 0;
1169 u32 features; 1170 u32 features;
1170 1171
1171 hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); 1172 hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
1172 if (!hdr) 1173 if (!hdr)
1173 return -ENOBUFS; 1174 return -ENOBUFS;
1174 1175
1175 /* allow always using the variables */ 1176 if (WARN_ON(!state))
1176 if (!split) { 1177 return -EINVAL;
1177 split_start = &start;
1178 band_start = &start_band;
1179 chan_start = &start_chan;
1180 }
1181 1178
1182 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || 1179 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
1183 nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, 1180 nla_put_string(msg, NL80211_ATTR_WIPHY_NAME,
@@ -1186,7 +1183,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1186 cfg80211_rdev_list_generation)) 1183 cfg80211_rdev_list_generation))
1187 goto nla_put_failure; 1184 goto nla_put_failure;
1188 1185
1189 switch (*split_start) { 1186 switch (state->split_start) {
1190 case 0: 1187 case 0:
1191 if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, 1188 if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
1192 dev->wiphy.retry_short) || 1189 dev->wiphy.retry_short) ||
@@ -1228,9 +1225,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1228 if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && 1225 if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
1229 nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) 1226 nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
1230 goto nla_put_failure; 1227 goto nla_put_failure;
1228 if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) &&
1229 nla_put_flag(msg, WIPHY_FLAG_SUPPORTS_5_10_MHZ))
1230 goto nla_put_failure;
1231 1231
1232 (*split_start)++; 1232 state->split_start++;
1233 if (split) 1233 if (state->split)
1234 break; 1234 break;
1235 case 1: 1235 case 1:
1236 if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, 1236 if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
@@ -1274,22 +1274,23 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1274 } 1274 }
1275 } 1275 }
1276 1276
1277 (*split_start)++; 1277 state->split_start++;
1278 if (split) 1278 if (state->split)
1279 break; 1279 break;
1280 case 2: 1280 case 2:
1281 if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, 1281 if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
1282 dev->wiphy.interface_modes)) 1282 dev->wiphy.interface_modes))
1283 goto nla_put_failure; 1283 goto nla_put_failure;
1284 (*split_start)++; 1284 state->split_start++;
1285 if (split) 1285 if (state->split)
1286 break; 1286 break;
1287 case 3: 1287 case 3:
1288 nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); 1288 nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
1289 if (!nl_bands) 1289 if (!nl_bands)
1290 goto nla_put_failure; 1290 goto nla_put_failure;
1291 1291
1292 for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) { 1292 for (band = state->band_start;
1293 band < IEEE80211_NUM_BANDS; band++) {
1293 struct ieee80211_supported_band *sband; 1294 struct ieee80211_supported_band *sband;
1294 1295
1295 sband = dev->wiphy.bands[band]; 1296 sband = dev->wiphy.bands[band];
@@ -1301,12 +1302,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1301 if (!nl_band) 1302 if (!nl_band)
1302 goto nla_put_failure; 1303 goto nla_put_failure;
1303 1304
1304 switch (*chan_start) { 1305 switch (state->chan_start) {
1305 case 0: 1306 case 0:
1306 if (nl80211_send_band_rateinfo(msg, sband)) 1307 if (nl80211_send_band_rateinfo(msg, sband))
1307 goto nla_put_failure; 1308 goto nla_put_failure;
1308 (*chan_start)++; 1309 state->chan_start++;
1309 if (split) 1310 if (state->split)
1310 break; 1311 break;
1311 default: 1312 default:
1312 /* add frequencies */ 1313 /* add frequencies */
@@ -1315,7 +1316,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1315 if (!nl_freqs) 1316 if (!nl_freqs)
1316 goto nla_put_failure; 1317 goto nla_put_failure;
1317 1318
1318 for (i = *chan_start - 1; 1319 for (i = state->chan_start - 1;
1319 i < sband->n_channels; 1320 i < sband->n_channels;
1320 i++) { 1321 i++) {
1321 nl_freq = nla_nest_start(msg, i); 1322 nl_freq = nla_nest_start(msg, i);
@@ -1324,26 +1325,27 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1324 1325
1325 chan = &sband->channels[i]; 1326 chan = &sband->channels[i];
1326 1327
1327 if (nl80211_msg_put_channel(msg, chan, 1328 if (nl80211_msg_put_channel(
1328 split)) 1329 msg, chan,
1330 state->split))
1329 goto nla_put_failure; 1331 goto nla_put_failure;
1330 1332
1331 nla_nest_end(msg, nl_freq); 1333 nla_nest_end(msg, nl_freq);
1332 if (split) 1334 if (state->split)
1333 break; 1335 break;
1334 } 1336 }
1335 if (i < sband->n_channels) 1337 if (i < sband->n_channels)
1336 *chan_start = i + 2; 1338 state->chan_start = i + 2;
1337 else 1339 else
1338 *chan_start = 0; 1340 state->chan_start = 0;
1339 nla_nest_end(msg, nl_freqs); 1341 nla_nest_end(msg, nl_freqs);
1340 } 1342 }
1341 1343
1342 nla_nest_end(msg, nl_band); 1344 nla_nest_end(msg, nl_band);
1343 1345
1344 if (split) { 1346 if (state->split) {
1345 /* start again here */ 1347 /* start again here */
1346 if (*chan_start) 1348 if (state->chan_start)
1347 band--; 1349 band--;
1348 break; 1350 break;
1349 } 1351 }
@@ -1351,14 +1353,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1351 nla_nest_end(msg, nl_bands); 1353 nla_nest_end(msg, nl_bands);
1352 1354
1353 if (band < IEEE80211_NUM_BANDS) 1355 if (band < IEEE80211_NUM_BANDS)
1354 *band_start = band + 1; 1356 state->band_start = band + 1;
1355 else 1357 else
1356 *band_start = 0; 1358 state->band_start = 0;
1357 1359
1358 /* if bands & channels are done, continue outside */ 1360 /* if bands & channels are done, continue outside */
1359 if (*band_start == 0 && *chan_start == 0) 1361 if (state->band_start == 0 && state->chan_start == 0)
1360 (*split_start)++; 1362 state->split_start++;
1361 if (split) 1363 if (state->split)
1362 break; 1364 break;
1363 case 4: 1365 case 4:
1364 nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); 1366 nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
@@ -1424,9 +1426,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1424 } 1426 }
1425 CMD(start_p2p_device, START_P2P_DEVICE); 1427 CMD(start_p2p_device, START_P2P_DEVICE);
1426 CMD(set_mcast_rate, SET_MCAST_RATE); 1428 CMD(set_mcast_rate, SET_MCAST_RATE);
1427 if (split) { 1429 if (state->split) {
1428 CMD(crit_proto_start, CRIT_PROTOCOL_START); 1430 CMD(crit_proto_start, CRIT_PROTOCOL_START);
1429 CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); 1431 CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
1432 if (dev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
1433 CMD(channel_switch, CHANNEL_SWITCH);
1430 } 1434 }
1431 1435
1432#ifdef CONFIG_NL80211_TESTMODE 1436#ifdef CONFIG_NL80211_TESTMODE
@@ -1448,8 +1452,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1448 } 1452 }
1449 1453
1450 nla_nest_end(msg, nl_cmds); 1454 nla_nest_end(msg, nl_cmds);
1451 (*split_start)++; 1455 state->split_start++;
1452 if (split) 1456 if (state->split)
1453 break; 1457 break;
1454 case 5: 1458 case 5:
1455 if (dev->ops->remain_on_channel && 1459 if (dev->ops->remain_on_channel &&
@@ -1465,29 +1469,30 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1465 1469
1466 if (nl80211_send_mgmt_stypes(msg, mgmt_stypes)) 1470 if (nl80211_send_mgmt_stypes(msg, mgmt_stypes))
1467 goto nla_put_failure; 1471 goto nla_put_failure;
1468 (*split_start)++; 1472 state->split_start++;
1469 if (split) 1473 if (state->split)
1470 break; 1474 break;
1471 case 6: 1475 case 6:
1472#ifdef CONFIG_PM 1476#ifdef CONFIG_PM
1473 if (nl80211_send_wowlan(msg, dev, split)) 1477 if (nl80211_send_wowlan(msg, dev, state->split))
1474 goto nla_put_failure; 1478 goto nla_put_failure;
1475 (*split_start)++; 1479 state->split_start++;
1476 if (split) 1480 if (state->split)
1477 break; 1481 break;
1478#else 1482#else
1479 (*split_start)++; 1483 state->split_start++;
1480#endif 1484#endif
1481 case 7: 1485 case 7:
1482 if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, 1486 if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
1483 dev->wiphy.software_iftypes)) 1487 dev->wiphy.software_iftypes))
1484 goto nla_put_failure; 1488 goto nla_put_failure;
1485 1489
1486 if (nl80211_put_iface_combinations(&dev->wiphy, msg, split)) 1490 if (nl80211_put_iface_combinations(&dev->wiphy, msg,
1491 state->split))
1487 goto nla_put_failure; 1492 goto nla_put_failure;
1488 1493
1489 (*split_start)++; 1494 state->split_start++;
1490 if (split) 1495 if (state->split)
1491 break; 1496 break;
1492 case 8: 1497 case 8:
1493 if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && 1498 if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
@@ -1501,7 +1506,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1501 * dump is split, otherwise it makes it too big. Therefore 1506 * dump is split, otherwise it makes it too big. Therefore
1502 * only advertise it in that case. 1507 * only advertise it in that case.
1503 */ 1508 */
1504 if (split) 1509 if (state->split)
1505 features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; 1510 features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS;
1506 if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) 1511 if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features))
1507 goto nla_put_failure; 1512 goto nla_put_failure;
@@ -1528,7 +1533,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1528 * case we'll continue with more data in the next round, 1533 * case we'll continue with more data in the next round,
1529 * but break unconditionally so unsplit data stops here. 1534 * but break unconditionally so unsplit data stops here.
1530 */ 1535 */
1531 (*split_start)++; 1536 state->split_start++;
1532 break; 1537 break;
1533 case 9: 1538 case 9:
1534 if (dev->wiphy.extended_capabilities && 1539 if (dev->wiphy.extended_capabilities &&
@@ -1546,8 +1551,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1546 dev->wiphy.vht_capa_mod_mask)) 1551 dev->wiphy.vht_capa_mod_mask))
1547 goto nla_put_failure; 1552 goto nla_put_failure;
1548 1553
1554 state->split_start++;
1555 break;
1556 case 10:
1557 if (nl80211_send_coalesce(msg, dev))
1558 goto nla_put_failure;
1559
1549 /* done */ 1560 /* done */
1550 *split_start = 0; 1561 state->split_start = 0;
1551 break; 1562 break;
1552 } 1563 }
1553 return genlmsg_end(msg, hdr); 1564 return genlmsg_end(msg, hdr);
@@ -1557,59 +1568,78 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1557 return -EMSGSIZE; 1568 return -EMSGSIZE;
1558} 1569}
1559 1570
1571static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
1572 struct netlink_callback *cb,
1573 struct nl80211_dump_wiphy_state *state)
1574{
1575 struct nlattr **tb = nl80211_fam.attrbuf;
1576 int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
1577 tb, nl80211_fam.maxattr, nl80211_policy);
1578 /* ignore parse errors for backward compatibility */
1579 if (ret)
1580 return 0;
1581
1582 state->split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP];
1583 if (tb[NL80211_ATTR_WIPHY])
1584 state->filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]);
1585 if (tb[NL80211_ATTR_WDEV])
1586 state->filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32;
1587 if (tb[NL80211_ATTR_IFINDEX]) {
1588 struct net_device *netdev;
1589 struct cfg80211_registered_device *rdev;
1590 int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]);
1591
1592 netdev = dev_get_by_index(sock_net(skb->sk), ifidx);
1593 if (!netdev)
1594 return -ENODEV;
1595 if (netdev->ieee80211_ptr) {
1596 rdev = wiphy_to_dev(
1597 netdev->ieee80211_ptr->wiphy);
1598 state->filter_wiphy = rdev->wiphy_idx;
1599 }
1600 dev_put(netdev);
1601 }
1602
1603 return 0;
1604}
1605
1560static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) 1606static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
1561{ 1607{
1562 int idx = 0, ret; 1608 int idx = 0, ret;
1563 int start = cb->args[0]; 1609 struct nl80211_dump_wiphy_state *state = (void *)cb->args[0];
1564 struct cfg80211_registered_device *dev; 1610 struct cfg80211_registered_device *dev;
1565 s64 filter_wiphy = -1;
1566 bool split = false;
1567 struct nlattr **tb = nl80211_fam.attrbuf;
1568 int res;
1569 1611
1570 mutex_lock(&cfg80211_mutex); 1612 rtnl_lock();
1571 res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, 1613 if (!state) {
1572 tb, nl80211_fam.maxattr, nl80211_policy); 1614 state = kzalloc(sizeof(*state), GFP_KERNEL);
1573 if (res == 0) { 1615 if (!state) {
1574 split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; 1616 rtnl_unlock();
1575 if (tb[NL80211_ATTR_WIPHY]) 1617 return -ENOMEM;
1576 filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); 1618 }
1577 if (tb[NL80211_ATTR_WDEV]) 1619 state->filter_wiphy = -1;
1578 filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; 1620 ret = nl80211_dump_wiphy_parse(skb, cb, state);
1579 if (tb[NL80211_ATTR_IFINDEX]) { 1621 if (ret) {
1580 struct net_device *netdev; 1622 kfree(state);
1581 int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); 1623 rtnl_unlock();
1582 1624 return ret;
1583 netdev = dev_get_by_index(sock_net(skb->sk), ifidx);
1584 if (!netdev) {
1585 mutex_unlock(&cfg80211_mutex);
1586 return -ENODEV;
1587 }
1588 if (netdev->ieee80211_ptr) {
1589 dev = wiphy_to_dev(
1590 netdev->ieee80211_ptr->wiphy);
1591 filter_wiphy = dev->wiphy_idx;
1592 }
1593 dev_put(netdev);
1594 } 1625 }
1626 cb->args[0] = (long)state;
1595 } 1627 }
1596 1628
1597 list_for_each_entry(dev, &cfg80211_rdev_list, list) { 1629 list_for_each_entry(dev, &cfg80211_rdev_list, list) {
1598 if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) 1630 if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
1599 continue; 1631 continue;
1600 if (++idx <= start) 1632 if (++idx <= state->start)
1601 continue; 1633 continue;
1602 if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy) 1634 if (state->filter_wiphy != -1 &&
1635 state->filter_wiphy != dev->wiphy_idx)
1603 continue; 1636 continue;
1604 /* attempt to fit multiple wiphy data chunks into the skb */ 1637 /* attempt to fit multiple wiphy data chunks into the skb */
1605 do { 1638 do {
1606 ret = nl80211_send_wiphy(dev, skb, 1639 ret = nl80211_send_wiphy(dev, skb,
1607 NETLINK_CB(cb->skb).portid, 1640 NETLINK_CB(cb->skb).portid,
1608 cb->nlh->nlmsg_seq, 1641 cb->nlh->nlmsg_seq,
1609 NLM_F_MULTI, 1642 NLM_F_MULTI, state);
1610 split, &cb->args[1],
1611 &cb->args[2],
1612 &cb->args[3]);
1613 if (ret < 0) { 1643 if (ret < 0) {
1614 /* 1644 /*
1615 * If sending the wiphy data didn't fit (ENOBUFS 1645 * If sending the wiphy data didn't fit (ENOBUFS
@@ -1628,33 +1658,40 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
1628 !skb->len && 1658 !skb->len &&
1629 cb->min_dump_alloc < 4096) { 1659 cb->min_dump_alloc < 4096) {
1630 cb->min_dump_alloc = 4096; 1660 cb->min_dump_alloc = 4096;
1631 mutex_unlock(&cfg80211_mutex); 1661 rtnl_unlock();
1632 return 1; 1662 return 1;
1633 } 1663 }
1634 idx--; 1664 idx--;
1635 break; 1665 break;
1636 } 1666 }
1637 } while (cb->args[1] > 0); 1667 } while (state->split_start > 0);
1638 break; 1668 break;
1639 } 1669 }
1640 mutex_unlock(&cfg80211_mutex); 1670 rtnl_unlock();
1641 1671
1642 cb->args[0] = idx; 1672 state->start = idx;
1643 1673
1644 return skb->len; 1674 return skb->len;
1645} 1675}
1646 1676
1677static int nl80211_dump_wiphy_done(struct netlink_callback *cb)
1678{
1679 kfree((void *)cb->args[0]);
1680 return 0;
1681}
1682
1647static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) 1683static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
1648{ 1684{
1649 struct sk_buff *msg; 1685 struct sk_buff *msg;
1650 struct cfg80211_registered_device *dev = info->user_ptr[0]; 1686 struct cfg80211_registered_device *dev = info->user_ptr[0];
1687 struct nl80211_dump_wiphy_state state = {};
1651 1688
1652 msg = nlmsg_new(4096, GFP_KERNEL); 1689 msg = nlmsg_new(4096, GFP_KERNEL);
1653 if (!msg) 1690 if (!msg)
1654 return -ENOMEM; 1691 return -ENOMEM;
1655 1692
1656 if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0, 1693 if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0,
1657 false, NULL, NULL, NULL) < 0) { 1694 &state) < 0) {
1658 nlmsg_free(msg); 1695 nlmsg_free(msg);
1659 return -ENOBUFS; 1696 return -ENOBUFS;
1660 } 1697 }
@@ -1771,6 +1808,11 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
1771 IEEE80211_CHAN_DISABLED)) 1808 IEEE80211_CHAN_DISABLED))
1772 return -EINVAL; 1809 return -EINVAL;
1773 1810
1811 if ((chandef->width == NL80211_CHAN_WIDTH_5 ||
1812 chandef->width == NL80211_CHAN_WIDTH_10) &&
1813 !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ))
1814 return -EINVAL;
1815
1774 return 0; 1816 return 0;
1775} 1817}
1776 1818
@@ -1792,7 +1834,6 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
1792 if (result) 1834 if (result)
1793 return result; 1835 return result;
1794 1836
1795 mutex_lock(&rdev->devlist_mtx);
1796 switch (iftype) { 1837 switch (iftype) {
1797 case NL80211_IFTYPE_AP: 1838 case NL80211_IFTYPE_AP:
1798 case NL80211_IFTYPE_P2P_GO: 1839 case NL80211_IFTYPE_P2P_GO:
@@ -1816,7 +1857,6 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
1816 default: 1857 default:
1817 result = -EINVAL; 1858 result = -EINVAL;
1818 } 1859 }
1819 mutex_unlock(&rdev->devlist_mtx);
1820 1860
1821 return result; 1861 return result;
1822} 1862}
@@ -1865,6 +1905,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
1865 u32 frag_threshold = 0, rts_threshold = 0; 1905 u32 frag_threshold = 0, rts_threshold = 0;
1866 u8 coverage_class = 0; 1906 u8 coverage_class = 0;
1867 1907
1908 ASSERT_RTNL();
1909
1868 /* 1910 /*
1869 * Try to find the wiphy and netdev. Normally this 1911 * Try to find the wiphy and netdev. Normally this
1870 * function shouldn't need the netdev, but this is 1912 * function shouldn't need the netdev, but this is
@@ -1874,31 +1916,25 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
1874 * also passed a netdev to set_wiphy, so that it is 1916 * also passed a netdev to set_wiphy, so that it is
1875 * possible to let that go to the right netdev! 1917 * possible to let that go to the right netdev!
1876 */ 1918 */
1877 mutex_lock(&cfg80211_mutex);
1878 1919
1879 if (info->attrs[NL80211_ATTR_IFINDEX]) { 1920 if (info->attrs[NL80211_ATTR_IFINDEX]) {
1880 int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); 1921 int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]);
1881 1922
1882 netdev = dev_get_by_index(genl_info_net(info), ifindex); 1923 netdev = dev_get_by_index(genl_info_net(info), ifindex);
1883 if (netdev && netdev->ieee80211_ptr) { 1924 if (netdev && netdev->ieee80211_ptr)
1884 rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy); 1925 rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy);
1885 mutex_lock(&rdev->mtx); 1926 else
1886 } else
1887 netdev = NULL; 1927 netdev = NULL;
1888 } 1928 }
1889 1929
1890 if (!netdev) { 1930 if (!netdev) {
1891 rdev = __cfg80211_rdev_from_attrs(genl_info_net(info), 1931 rdev = __cfg80211_rdev_from_attrs(genl_info_net(info),
1892 info->attrs); 1932 info->attrs);
1893 if (IS_ERR(rdev)) { 1933 if (IS_ERR(rdev))
1894 mutex_unlock(&cfg80211_mutex);
1895 return PTR_ERR(rdev); 1934 return PTR_ERR(rdev);
1896 }
1897 wdev = NULL; 1935 wdev = NULL;
1898 netdev = NULL; 1936 netdev = NULL;
1899 result = 0; 1937 result = 0;
1900
1901 mutex_lock(&rdev->mtx);
1902 } else 1938 } else
1903 wdev = netdev->ieee80211_ptr; 1939 wdev = netdev->ieee80211_ptr;
1904 1940
@@ -1911,8 +1947,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
1911 result = cfg80211_dev_rename( 1947 result = cfg80211_dev_rename(
1912 rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); 1948 rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME]));
1913 1949
1914 mutex_unlock(&cfg80211_mutex);
1915
1916 if (result) 1950 if (result)
1917 goto bad_res; 1951 goto bad_res;
1918 1952
@@ -2119,7 +2153,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
2119 } 2153 }
2120 2154
2121 bad_res: 2155 bad_res:
2122 mutex_unlock(&rdev->mtx);
2123 if (netdev) 2156 if (netdev)
2124 dev_put(netdev); 2157 dev_put(netdev);
2125 return result; 2158 return result;
@@ -2217,7 +2250,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
2217 struct cfg80211_registered_device *rdev; 2250 struct cfg80211_registered_device *rdev;
2218 struct wireless_dev *wdev; 2251 struct wireless_dev *wdev;
2219 2252
2220 mutex_lock(&cfg80211_mutex); 2253 rtnl_lock();
2221 list_for_each_entry(rdev, &cfg80211_rdev_list, list) { 2254 list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
2222 if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) 2255 if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk)))
2223 continue; 2256 continue;
@@ -2227,7 +2260,6 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
2227 } 2260 }
2228 if_idx = 0; 2261 if_idx = 0;
2229 2262
2230 mutex_lock(&rdev->devlist_mtx);
2231 list_for_each_entry(wdev, &rdev->wdev_list, list) { 2263 list_for_each_entry(wdev, &rdev->wdev_list, list) {
2232 if (if_idx < if_start) { 2264 if (if_idx < if_start) {
2233 if_idx++; 2265 if_idx++;
@@ -2236,17 +2268,15 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
2236 if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, 2268 if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid,
2237 cb->nlh->nlmsg_seq, NLM_F_MULTI, 2269 cb->nlh->nlmsg_seq, NLM_F_MULTI,
2238 rdev, wdev) < 0) { 2270 rdev, wdev) < 0) {
2239 mutex_unlock(&rdev->devlist_mtx);
2240 goto out; 2271 goto out;
2241 } 2272 }
2242 if_idx++; 2273 if_idx++;
2243 } 2274 }
2244 mutex_unlock(&rdev->devlist_mtx);
2245 2275
2246 wp_idx++; 2276 wp_idx++;
2247 } 2277 }
2248 out: 2278 out:
2249 mutex_unlock(&cfg80211_mutex); 2279 rtnl_unlock();
2250 2280
2251 cb->args[0] = wp_idx; 2281 cb->args[0] = wp_idx;
2252 cb->args[1] = if_idx; 2282 cb->args[1] = if_idx;
@@ -2279,6 +2309,7 @@ static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = {
2279 [NL80211_MNTR_FLAG_CONTROL] = { .type = NLA_FLAG }, 2309 [NL80211_MNTR_FLAG_CONTROL] = { .type = NLA_FLAG },
2280 [NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG }, 2310 [NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG },
2281 [NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG }, 2311 [NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG },
2312 [NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG },
2282}; 2313};
2283 2314
2284static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) 2315static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags)
@@ -2390,6 +2421,10 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
2390 change = true; 2421 change = true;
2391 } 2422 }
2392 2423
2424 if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) &&
2425 !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
2426 return -EOPNOTSUPP;
2427
2393 if (change) 2428 if (change)
2394 err = cfg80211_change_iface(rdev, dev, ntype, flags, &params); 2429 err = cfg80211_change_iface(rdev, dev, ntype, flags, &params);
2395 else 2430 else
@@ -2447,6 +2482,11 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
2447 err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? 2482 err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ?
2448 info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, 2483 info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
2449 &flags); 2484 &flags);
2485
2486 if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) &&
2487 !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
2488 return -EOPNOTSUPP;
2489
2450 wdev = rdev_add_virtual_intf(rdev, 2490 wdev = rdev_add_virtual_intf(rdev,
2451 nla_data(info->attrs[NL80211_ATTR_IFNAME]), 2491 nla_data(info->attrs[NL80211_ATTR_IFNAME]),
2452 type, err ? NULL : &flags, &params); 2492 type, err ? NULL : &flags, &params);
@@ -2479,11 +2519,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
2479 INIT_LIST_HEAD(&wdev->mgmt_registrations); 2519 INIT_LIST_HEAD(&wdev->mgmt_registrations);
2480 spin_lock_init(&wdev->mgmt_registrations_lock); 2520 spin_lock_init(&wdev->mgmt_registrations_lock);
2481 2521
2482 mutex_lock(&rdev->devlist_mtx);
2483 wdev->identifier = ++rdev->wdev_id; 2522 wdev->identifier = ++rdev->wdev_id;
2484 list_add_rcu(&wdev->list, &rdev->wdev_list); 2523 list_add_rcu(&wdev->list, &rdev->wdev_list);
2485 rdev->devlist_generation++; 2524 rdev->devlist_generation++;
2486 mutex_unlock(&rdev->devlist_mtx);
2487 break; 2525 break;
2488 default: 2526 default:
2489 break; 2527 break;
@@ -2626,8 +2664,8 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
2626 2664
2627 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, 2665 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
2628 NL80211_CMD_NEW_KEY); 2666 NL80211_CMD_NEW_KEY);
2629 if (IS_ERR(hdr)) 2667 if (!hdr)
2630 return PTR_ERR(hdr); 2668 return -ENOBUFS;
2631 2669
2632 cookie.msg = msg; 2670 cookie.msg = msg;
2633 cookie.idx = key_idx; 2671 cookie.idx = key_idx;
@@ -2926,61 +2964,58 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info)
2926 return err; 2964 return err;
2927} 2965}
2928 2966
2929static int nl80211_parse_beacon(struct genl_info *info, 2967static int nl80211_parse_beacon(struct nlattr *attrs[],
2930 struct cfg80211_beacon_data *bcn) 2968 struct cfg80211_beacon_data *bcn)
2931{ 2969{
2932 bool haveinfo = false; 2970 bool haveinfo = false;
2933 2971
2934 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL]) || 2972 if (!is_valid_ie_attr(attrs[NL80211_ATTR_BEACON_TAIL]) ||
2935 !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]) || 2973 !is_valid_ie_attr(attrs[NL80211_ATTR_IE]) ||
2936 !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_PROBE_RESP]) || 2974 !is_valid_ie_attr(attrs[NL80211_ATTR_IE_PROBE_RESP]) ||
2937 !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_ASSOC_RESP])) 2975 !is_valid_ie_attr(attrs[NL80211_ATTR_IE_ASSOC_RESP]))
2938 return -EINVAL; 2976 return -EINVAL;
2939 2977
2940 memset(bcn, 0, sizeof(*bcn)); 2978 memset(bcn, 0, sizeof(*bcn));
2941 2979
2942 if (info->attrs[NL80211_ATTR_BEACON_HEAD]) { 2980 if (attrs[NL80211_ATTR_BEACON_HEAD]) {
2943 bcn->head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]); 2981 bcn->head = nla_data(attrs[NL80211_ATTR_BEACON_HEAD]);
2944 bcn->head_len = nla_len(info->attrs[NL80211_ATTR_BEACON_HEAD]); 2982 bcn->head_len = nla_len(attrs[NL80211_ATTR_BEACON_HEAD]);
2945 if (!bcn->head_len) 2983 if (!bcn->head_len)
2946 return -EINVAL; 2984 return -EINVAL;
2947 haveinfo = true; 2985 haveinfo = true;
2948 } 2986 }
2949 2987
2950 if (info->attrs[NL80211_ATTR_BEACON_TAIL]) { 2988 if (attrs[NL80211_ATTR_BEACON_TAIL]) {
2951 bcn->tail = nla_data(info->attrs[NL80211_ATTR_BEACON_TAIL]); 2989 bcn->tail = nla_data(attrs[NL80211_ATTR_BEACON_TAIL]);
2952 bcn->tail_len = 2990 bcn->tail_len = nla_len(attrs[NL80211_ATTR_BEACON_TAIL]);
2953 nla_len(info->attrs[NL80211_ATTR_BEACON_TAIL]);
2954 haveinfo = true; 2991 haveinfo = true;
2955 } 2992 }
2956 2993
2957 if (!haveinfo) 2994 if (!haveinfo)
2958 return -EINVAL; 2995 return -EINVAL;
2959 2996
2960 if (info->attrs[NL80211_ATTR_IE]) { 2997 if (attrs[NL80211_ATTR_IE]) {
2961 bcn->beacon_ies = nla_data(info->attrs[NL80211_ATTR_IE]); 2998 bcn->beacon_ies = nla_data(attrs[NL80211_ATTR_IE]);
2962 bcn->beacon_ies_len = nla_len(info->attrs[NL80211_ATTR_IE]); 2999 bcn->beacon_ies_len = nla_len(attrs[NL80211_ATTR_IE]);
2963 } 3000 }
2964 3001
2965 if (info->attrs[NL80211_ATTR_IE_PROBE_RESP]) { 3002 if (attrs[NL80211_ATTR_IE_PROBE_RESP]) {
2966 bcn->proberesp_ies = 3003 bcn->proberesp_ies =
2967 nla_data(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); 3004 nla_data(attrs[NL80211_ATTR_IE_PROBE_RESP]);
2968 bcn->proberesp_ies_len = 3005 bcn->proberesp_ies_len =
2969 nla_len(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); 3006 nla_len(attrs[NL80211_ATTR_IE_PROBE_RESP]);
2970 } 3007 }
2971 3008
2972 if (info->attrs[NL80211_ATTR_IE_ASSOC_RESP]) { 3009 if (attrs[NL80211_ATTR_IE_ASSOC_RESP]) {
2973 bcn->assocresp_ies = 3010 bcn->assocresp_ies =
2974 nla_data(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); 3011 nla_data(attrs[NL80211_ATTR_IE_ASSOC_RESP]);
2975 bcn->assocresp_ies_len = 3012 bcn->assocresp_ies_len =
2976 nla_len(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); 3013 nla_len(attrs[NL80211_ATTR_IE_ASSOC_RESP]);
2977 } 3014 }
2978 3015
2979 if (info->attrs[NL80211_ATTR_PROBE_RESP]) { 3016 if (attrs[NL80211_ATTR_PROBE_RESP]) {
2980 bcn->probe_resp = 3017 bcn->probe_resp = nla_data(attrs[NL80211_ATTR_PROBE_RESP]);
2981 nla_data(info->attrs[NL80211_ATTR_PROBE_RESP]); 3018 bcn->probe_resp_len = nla_len(attrs[NL80211_ATTR_PROBE_RESP]);
2982 bcn->probe_resp_len =
2983 nla_len(info->attrs[NL80211_ATTR_PROBE_RESP]);
2984 } 3019 }
2985 3020
2986 return 0; 3021 return 0;
@@ -2992,8 +3027,6 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev,
2992 struct wireless_dev *wdev; 3027 struct wireless_dev *wdev;
2993 bool ret = false; 3028 bool ret = false;
2994 3029
2995 mutex_lock(&rdev->devlist_mtx);
2996
2997 list_for_each_entry(wdev, &rdev->wdev_list, list) { 3030 list_for_each_entry(wdev, &rdev->wdev_list, list) {
2998 if (wdev->iftype != NL80211_IFTYPE_AP && 3031 if (wdev->iftype != NL80211_IFTYPE_AP &&
2999 wdev->iftype != NL80211_IFTYPE_P2P_GO) 3032 wdev->iftype != NL80211_IFTYPE_P2P_GO)
@@ -3007,8 +3040,6 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev,
3007 break; 3040 break;
3008 } 3041 }
3009 3042
3010 mutex_unlock(&rdev->devlist_mtx);
3011
3012 return ret; 3043 return ret;
3013} 3044}
3014 3045
@@ -3063,7 +3094,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
3063 !info->attrs[NL80211_ATTR_BEACON_HEAD]) 3094 !info->attrs[NL80211_ATTR_BEACON_HEAD])
3064 return -EINVAL; 3095 return -EINVAL;
3065 3096
3066 err = nl80211_parse_beacon(info, &params.beacon); 3097 err = nl80211_parse_beacon(info->attrs, &params.beacon);
3067 if (err) 3098 if (err)
3068 return err; 3099 return err;
3069 3100
@@ -3170,13 +3201,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
3170 params.radar_required = true; 3201 params.radar_required = true;
3171 } 3202 }
3172 3203
3173 mutex_lock(&rdev->devlist_mtx);
3174 err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, 3204 err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
3175 params.chandef.chan, 3205 params.chandef.chan,
3176 CHAN_MODE_SHARED, 3206 CHAN_MODE_SHARED,
3177 radar_detect_width); 3207 radar_detect_width);
3178 mutex_unlock(&rdev->devlist_mtx);
3179
3180 if (err) 3208 if (err)
3181 return err; 3209 return err;
3182 3210
@@ -3218,7 +3246,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info)
3218 if (!wdev->beacon_interval) 3246 if (!wdev->beacon_interval)
3219 return -EINVAL; 3247 return -EINVAL;
3220 3248
3221 err = nl80211_parse_beacon(info, &params); 3249 err = nl80211_parse_beacon(info->attrs, &params);
3222 if (err) 3250 if (err)
3223 return err; 3251 return err;
3224 3252
@@ -3376,6 +3404,32 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
3376 return true; 3404 return true;
3377} 3405}
3378 3406
3407static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal,
3408 int id)
3409{
3410 void *attr;
3411 int i = 0;
3412
3413 if (!mask)
3414 return true;
3415
3416 attr = nla_nest_start(msg, id);
3417 if (!attr)
3418 return false;
3419
3420 for (i = 0; i < IEEE80211_MAX_CHAINS; i++) {
3421 if (!(mask & BIT(i)))
3422 continue;
3423
3424 if (nla_put_u8(msg, i, signal[i]))
3425 return false;
3426 }
3427
3428 nla_nest_end(msg, attr);
3429
3430 return true;
3431}
3432
3379static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, 3433static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
3380 int flags, 3434 int flags,
3381 struct cfg80211_registered_device *rdev, 3435 struct cfg80211_registered_device *rdev,
@@ -3447,6 +3501,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
3447 default: 3501 default:
3448 break; 3502 break;
3449 } 3503 }
3504 if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL) {
3505 if (!nl80211_put_signal(msg, sinfo->chains,
3506 sinfo->chain_signal,
3507 NL80211_STA_INFO_CHAIN_SIGNAL))
3508 goto nla_put_failure;
3509 }
3510 if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL_AVG) {
3511 if (!nl80211_put_signal(msg, sinfo->chains,
3512 sinfo->chain_signal_avg,
3513 NL80211_STA_INFO_CHAIN_SIGNAL_AVG))
3514 goto nla_put_failure;
3515 }
3450 if (sinfo->filled & STATION_INFO_TX_BITRATE) { 3516 if (sinfo->filled & STATION_INFO_TX_BITRATE) {
3451 if (!nl80211_put_sta_rate(msg, &sinfo->txrate, 3517 if (!nl80211_put_sta_rate(msg, &sinfo->txrate,
3452 NL80211_STA_INFO_TX_BITRATE)) 3518 NL80211_STA_INFO_TX_BITRATE))
@@ -3834,6 +3900,8 @@ static int nl80211_set_station_tdls(struct genl_info *info,
3834 struct station_parameters *params) 3900 struct station_parameters *params)
3835{ 3901{
3836 /* Dummy STA entry gets updated once the peer capabilities are known */ 3902 /* Dummy STA entry gets updated once the peer capabilities are known */
3903 if (info->attrs[NL80211_ATTR_PEER_AID])
3904 params->aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]);
3837 if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) 3905 if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
3838 params->ht_capa = 3906 params->ht_capa =
3839 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); 3907 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
@@ -3974,7 +4042,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3974 if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) 4042 if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES])
3975 return -EINVAL; 4043 return -EINVAL;
3976 4044
3977 if (!info->attrs[NL80211_ATTR_STA_AID]) 4045 if (!info->attrs[NL80211_ATTR_STA_AID] &&
4046 !info->attrs[NL80211_ATTR_PEER_AID])
3978 return -EINVAL; 4047 return -EINVAL;
3979 4048
3980 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 4049 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
@@ -3985,7 +4054,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3985 params.listen_interval = 4054 params.listen_interval =
3986 nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); 4055 nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
3987 4056
3988 params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); 4057 if (info->attrs[NL80211_ATTR_PEER_AID])
4058 params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]);
4059 else
4060 params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
3989 if (!params.aid || params.aid > IEEE80211_MAX_AID) 4061 if (!params.aid || params.aid > IEEE80211_MAX_AID)
3990 return -EINVAL; 4062 return -EINVAL;
3991 4063
@@ -4037,7 +4109,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
4037 params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; 4109 params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
4038 4110
4039 /* TDLS peers cannot be added */ 4111 /* TDLS peers cannot be added */
4040 if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) 4112 if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) ||
4113 info->attrs[NL80211_ATTR_PEER_AID])
4041 return -EINVAL; 4114 return -EINVAL;
4042 /* but don't bother the driver with it */ 4115 /* but don't bother the driver with it */
4043 params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); 4116 params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
@@ -4063,7 +4136,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
4063 if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) 4136 if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED))
4064 return -EINVAL; 4137 return -EINVAL;
4065 /* TDLS peers cannot be added */ 4138 /* TDLS peers cannot be added */
4066 if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) 4139 if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) ||
4140 info->attrs[NL80211_ATTR_PEER_AID])
4067 return -EINVAL; 4141 return -EINVAL;
4068 break; 4142 break;
4069 case NL80211_IFTYPE_STATION: 4143 case NL80211_IFTYPE_STATION:
@@ -4585,7 +4659,9 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
4585 nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, 4659 nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE,
4586 cur_params.power_mode) || 4660 cur_params.power_mode) ||
4587 nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, 4661 nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW,
4588 cur_params.dot11MeshAwakeWindowDuration)) 4662 cur_params.dot11MeshAwakeWindowDuration) ||
4663 nla_put_u32(msg, NL80211_MESHCONF_PLINK_TIMEOUT,
4664 cur_params.plink_timeout))
4589 goto nla_put_failure; 4665 goto nla_put_failure;
4590 nla_nest_end(msg, pinfoattr); 4666 nla_nest_end(msg, pinfoattr);
4591 genlmsg_end(msg, hdr); 4667 genlmsg_end(msg, hdr);
@@ -4626,6 +4702,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
4626 [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, 4702 [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 },
4627 [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, 4703 [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 },
4628 [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, 4704 [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 },
4705 [NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 },
4629}; 4706};
4630 4707
4631static const struct nla_policy 4708static const struct nla_policy
@@ -4634,6 +4711,7 @@ static const struct nla_policy
4634 [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, 4711 [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
4635 [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, 4712 [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
4636 [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, 4713 [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
4714 [NL80211_MESH_SETUP_AUTH_PROTOCOL] = { .type = NLA_U8 },
4637 [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, 4715 [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG },
4638 [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, 4716 [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
4639 .len = IEEE80211_MAX_DATA_LEN }, 4717 .len = IEEE80211_MAX_DATA_LEN },
@@ -4736,9 +4814,9 @@ do { \
4736 FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1, 4814 FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1,
4737 mask, NL80211_MESHCONF_FORWARDING, 4815 mask, NL80211_MESHCONF_FORWARDING,
4738 nla_get_u8); 4816 nla_get_u8);
4739 FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, 1, 255, 4817 FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
4740 mask, NL80211_MESHCONF_RSSI_THRESHOLD, 4818 mask, NL80211_MESHCONF_RSSI_THRESHOLD,
4741 nla_get_u32); 4819 nla_get_s32);
4742 FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16, 4820 FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
4743 mask, NL80211_MESHCONF_HT_OPMODE, 4821 mask, NL80211_MESHCONF_HT_OPMODE,
4744 nla_get_u16); 4822 nla_get_u16);
@@ -4762,6 +4840,9 @@ do { \
4762 FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, 4840 FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
4763 0, 65535, mask, 4841 0, 65535, mask,
4764 NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); 4842 NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16);
4843 FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 1, 0xffffffff,
4844 mask, NL80211_MESHCONF_PLINK_TIMEOUT,
4845 nla_get_u32);
4765 if (mask_out) 4846 if (mask_out)
4766 *mask_out = mask; 4847 *mask_out = mask;
4767 4848
@@ -4819,6 +4900,13 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
4819 if (setup->is_secure) 4900 if (setup->is_secure)
4820 setup->user_mpm = true; 4901 setup->user_mpm = true;
4821 4902
4903 if (tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]) {
4904 if (!setup->user_mpm)
4905 return -EINVAL;
4906 setup->auth_id =
4907 nla_get_u8(tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]);
4908 }
4909
4822 return 0; 4910 return 0;
4823} 4911}
4824 4912
@@ -4861,18 +4949,13 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
4861 void *hdr = NULL; 4949 void *hdr = NULL;
4862 struct nlattr *nl_reg_rules; 4950 struct nlattr *nl_reg_rules;
4863 unsigned int i; 4951 unsigned int i;
4864 int err = -EINVAL;
4865
4866 mutex_lock(&cfg80211_mutex);
4867 4952
4868 if (!cfg80211_regdomain) 4953 if (!cfg80211_regdomain)
4869 goto out; 4954 return -EINVAL;
4870 4955
4871 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 4956 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
4872 if (!msg) { 4957 if (!msg)
4873 err = -ENOBUFS; 4958 return -ENOBUFS;
4874 goto out;
4875 }
4876 4959
4877 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, 4960 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
4878 NL80211_CMD_GET_REG); 4961 NL80211_CMD_GET_REG);
@@ -4931,8 +5014,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
4931 nla_nest_end(msg, nl_reg_rules); 5014 nla_nest_end(msg, nl_reg_rules);
4932 5015
4933 genlmsg_end(msg, hdr); 5016 genlmsg_end(msg, hdr);
4934 err = genlmsg_reply(msg, info); 5017 return genlmsg_reply(msg, info);
4935 goto out;
4936 5018
4937nla_put_failure_rcu: 5019nla_put_failure_rcu:
4938 rcu_read_unlock(); 5020 rcu_read_unlock();
@@ -4940,10 +5022,7 @@ nla_put_failure:
4940 genlmsg_cancel(msg, hdr); 5022 genlmsg_cancel(msg, hdr);
4941put_failure: 5023put_failure:
4942 nlmsg_free(msg); 5024 nlmsg_free(msg);
4943 err = -EMSGSIZE; 5025 return -EMSGSIZE;
4944out:
4945 mutex_unlock(&cfg80211_mutex);
4946 return err;
4947} 5026}
4948 5027
4949static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) 5028static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
@@ -5009,12 +5088,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
5009 } 5088 }
5010 } 5089 }
5011 5090
5012 mutex_lock(&cfg80211_mutex);
5013
5014 r = set_regdom(rd); 5091 r = set_regdom(rd);
5015 /* set_regdom took ownership */ 5092 /* set_regdom took ownership */
5016 rd = NULL; 5093 rd = NULL;
5017 mutex_unlock(&cfg80211_mutex);
5018 5094
5019 bad_reg: 5095 bad_reg:
5020 kfree(rd); 5096 kfree(rd);
@@ -5064,7 +5140,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
5064 if (!rdev->ops->scan) 5140 if (!rdev->ops->scan)
5065 return -EOPNOTSUPP; 5141 return -EOPNOTSUPP;
5066 5142
5067 mutex_lock(&rdev->sched_scan_mtx);
5068 if (rdev->scan_req) { 5143 if (rdev->scan_req) {
5069 err = -EBUSY; 5144 err = -EBUSY;
5070 goto unlock; 5145 goto unlock;
@@ -5250,7 +5325,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
5250 } 5325 }
5251 5326
5252 unlock: 5327 unlock:
5253 mutex_unlock(&rdev->sched_scan_mtx);
5254 return err; 5328 return err;
5255} 5329}
5256 5330
@@ -5322,8 +5396,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
5322 if (ie_len > wiphy->max_sched_scan_ie_len) 5396 if (ie_len > wiphy->max_sched_scan_ie_len)
5323 return -EINVAL; 5397 return -EINVAL;
5324 5398
5325 mutex_lock(&rdev->sched_scan_mtx);
5326
5327 if (rdev->sched_scan_req) { 5399 if (rdev->sched_scan_req) {
5328 err = -EINPROGRESS; 5400 err = -EINPROGRESS;
5329 goto out; 5401 goto out;
@@ -5491,7 +5563,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
5491out_free: 5563out_free:
5492 kfree(request); 5564 kfree(request);
5493out: 5565out:
5494 mutex_unlock(&rdev->sched_scan_mtx);
5495 return err; 5566 return err;
5496} 5567}
5497 5568
@@ -5499,17 +5570,12 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb,
5499 struct genl_info *info) 5570 struct genl_info *info)
5500{ 5571{
5501 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 5572 struct cfg80211_registered_device *rdev = info->user_ptr[0];
5502 int err;
5503 5573
5504 if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || 5574 if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
5505 !rdev->ops->sched_scan_stop) 5575 !rdev->ops->sched_scan_stop)
5506 return -EOPNOTSUPP; 5576 return -EOPNOTSUPP;
5507 5577
5508 mutex_lock(&rdev->sched_scan_mtx); 5578 return __cfg80211_stop_sched_scan(rdev, false);
5509 err = __cfg80211_stop_sched_scan(rdev, false);
5510 mutex_unlock(&rdev->sched_scan_mtx);
5511
5512 return err;
5513} 5579}
5514 5580
5515static int nl80211_start_radar_detection(struct sk_buff *skb, 5581static int nl80211_start_radar_detection(struct sk_buff *skb,
@@ -5541,12 +5607,11 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
5541 if (!rdev->ops->start_radar_detection) 5607 if (!rdev->ops->start_radar_detection)
5542 return -EOPNOTSUPP; 5608 return -EOPNOTSUPP;
5543 5609
5544 mutex_lock(&rdev->devlist_mtx);
5545 err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, 5610 err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
5546 chandef.chan, CHAN_MODE_SHARED, 5611 chandef.chan, CHAN_MODE_SHARED,
5547 BIT(chandef.width)); 5612 BIT(chandef.width));
5548 if (err) 5613 if (err)
5549 goto err_locked; 5614 return err;
5550 5615
5551 err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); 5616 err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef);
5552 if (!err) { 5617 if (!err) {
@@ -5554,12 +5619,114 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
5554 wdev->cac_started = true; 5619 wdev->cac_started = true;
5555 wdev->cac_start_time = jiffies; 5620 wdev->cac_start_time = jiffies;
5556 } 5621 }
5557err_locked:
5558 mutex_unlock(&rdev->devlist_mtx);
5559
5560 return err; 5622 return err;
5561} 5623}
5562 5624
5625static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
5626{
5627 struct cfg80211_registered_device *rdev = info->user_ptr[0];
5628 struct net_device *dev = info->user_ptr[1];
5629 struct wireless_dev *wdev = dev->ieee80211_ptr;
5630 struct cfg80211_csa_settings params;
5631 /* csa_attrs is defined static to avoid waste of stack size - this
5632 * function is called under RTNL lock, so this should not be a problem.
5633 */
5634 static struct nlattr *csa_attrs[NL80211_ATTR_MAX+1];
5635 u8 radar_detect_width = 0;
5636 int err;
5637
5638 if (!rdev->ops->channel_switch ||
5639 !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH))
5640 return -EOPNOTSUPP;
5641
5642 /* may add IBSS support later */
5643 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
5644 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
5645 return -EOPNOTSUPP;
5646
5647 memset(&params, 0, sizeof(params));
5648
5649 if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
5650 !info->attrs[NL80211_ATTR_CH_SWITCH_COUNT])
5651 return -EINVAL;
5652
5653 /* only important for AP, IBSS and mesh create IEs internally */
5654 if (!info->attrs[NL80211_ATTR_CSA_IES])
5655 return -EINVAL;
5656
5657 /* useless if AP is not running */
5658 if (!wdev->beacon_interval)
5659 return -EINVAL;
5660
5661 params.count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]);
5662
5663 err = nl80211_parse_beacon(info->attrs, &params.beacon_after);
5664 if (err)
5665 return err;
5666
5667 err = nla_parse_nested(csa_attrs, NL80211_ATTR_MAX,
5668 info->attrs[NL80211_ATTR_CSA_IES],
5669 nl80211_policy);
5670 if (err)
5671 return err;
5672
5673 err = nl80211_parse_beacon(csa_attrs, &params.beacon_csa);
5674 if (err)
5675 return err;
5676
5677 if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON])
5678 return -EINVAL;
5679
5680 params.counter_offset_beacon =
5681 nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
5682 if (params.counter_offset_beacon >= params.beacon_csa.tail_len)
5683 return -EINVAL;
5684
5685 /* sanity check - counters should be the same */
5686 if (params.beacon_csa.tail[params.counter_offset_beacon] !=
5687 params.count)
5688 return -EINVAL;
5689
5690 if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) {
5691 params.counter_offset_presp =
5692 nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
5693 if (params.counter_offset_presp >=
5694 params.beacon_csa.probe_resp_len)
5695 return -EINVAL;
5696
5697 if (params.beacon_csa.probe_resp[params.counter_offset_presp] !=
5698 params.count)
5699 return -EINVAL;
5700 }
5701
5702 err = nl80211_parse_chandef(rdev, info, &params.chandef);
5703 if (err)
5704 return err;
5705
5706 if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
5707 return -EINVAL;
5708
5709 err = cfg80211_chandef_dfs_required(wdev->wiphy, &params.chandef);
5710 if (err < 0) {
5711 return err;
5712 } else if (err) {
5713 radar_detect_width = BIT(params.chandef.width);
5714 params.radar_required = true;
5715 }
5716
5717 err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
5718 params.chandef.chan,
5719 CHAN_MODE_SHARED,
5720 radar_detect_width);
5721 if (err)
5722 return err;
5723
5724 if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX])
5725 params.block_tx = true;
5726
5727 return rdev_channel_switch(rdev, dev, &params);
5728}
5729
5563static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, 5730static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
5564 u32 seq, int flags, 5731 u32 seq, int flags,
5565 struct cfg80211_registered_device *rdev, 5732 struct cfg80211_registered_device *rdev,
@@ -5621,6 +5788,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
5621 goto nla_put_failure; 5788 goto nla_put_failure;
5622 if (nla_put_u16(msg, NL80211_BSS_CAPABILITY, res->capability) || 5789 if (nla_put_u16(msg, NL80211_BSS_CAPABILITY, res->capability) ||
5623 nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) || 5790 nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) ||
5791 nla_put_u32(msg, NL80211_BSS_CHAN_WIDTH, res->scan_width) ||
5624 nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO, 5792 nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO,
5625 jiffies_to_msecs(jiffies - intbss->ts))) 5793 jiffies_to_msecs(jiffies - intbss->ts)))
5626 goto nla_put_failure; 5794 goto nla_put_failure;
@@ -5939,10 +6107,13 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
5939 if (local_state_change) 6107 if (local_state_change)
5940 return 0; 6108 return 0;
5941 6109
5942 return cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, 6110 wdev_lock(dev->ieee80211_ptr);
5943 ssid, ssid_len, ie, ie_len, 6111 err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
5944 key.p.key, key.p.key_len, key.idx, 6112 ssid, ssid_len, ie, ie_len,
5945 sae_data, sae_data_len); 6113 key.p.key, key.p.key_len, key.idx,
6114 sae_data, sae_data_len);
6115 wdev_unlock(dev->ieee80211_ptr);
6116 return err;
5946} 6117}
5947 6118
5948static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, 6119static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
@@ -6109,9 +6280,12 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
6109 } 6280 }
6110 6281
6111 err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); 6282 err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
6112 if (!err) 6283 if (!err) {
6284 wdev_lock(dev->ieee80211_ptr);
6113 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, 6285 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid,
6114 ssid, ssid_len, &req); 6286 ssid, ssid_len, &req);
6287 wdev_unlock(dev->ieee80211_ptr);
6288 }
6115 6289
6116 return err; 6290 return err;
6117} 6291}
@@ -6121,7 +6295,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
6121 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 6295 struct cfg80211_registered_device *rdev = info->user_ptr[0];
6122 struct net_device *dev = info->user_ptr[1]; 6296 struct net_device *dev = info->user_ptr[1];
6123 const u8 *ie = NULL, *bssid; 6297 const u8 *ie = NULL, *bssid;
6124 int ie_len = 0; 6298 int ie_len = 0, err;
6125 u16 reason_code; 6299 u16 reason_code;
6126 bool local_state_change; 6300 bool local_state_change;
6127 6301
@@ -6156,8 +6330,11 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
6156 6330
6157 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; 6331 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
6158 6332
6159 return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, 6333 wdev_lock(dev->ieee80211_ptr);
6160 local_state_change); 6334 err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code,
6335 local_state_change);
6336 wdev_unlock(dev->ieee80211_ptr);
6337 return err;
6161} 6338}
6162 6339
6163static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) 6340static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
@@ -6165,7 +6342,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
6165 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 6342 struct cfg80211_registered_device *rdev = info->user_ptr[0];
6166 struct net_device *dev = info->user_ptr[1]; 6343 struct net_device *dev = info->user_ptr[1];
6167 const u8 *ie = NULL, *bssid; 6344 const u8 *ie = NULL, *bssid;
6168 int ie_len = 0; 6345 int ie_len = 0, err;
6169 u16 reason_code; 6346 u16 reason_code;
6170 bool local_state_change; 6347 bool local_state_change;
6171 6348
@@ -6200,8 +6377,11 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
6200 6377
6201 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; 6378 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
6202 6379
6203 return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, 6380 wdev_lock(dev->ieee80211_ptr);
6204 local_state_change); 6381 err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code,
6382 local_state_change);
6383 wdev_unlock(dev->ieee80211_ptr);
6384 return err;
6205} 6385}
6206 6386
6207static bool 6387static bool
@@ -6288,11 +6468,18 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
6288 if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) 6468 if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef))
6289 return -EINVAL; 6469 return -EINVAL;
6290 6470
6291 if (ibss.chandef.width > NL80211_CHAN_WIDTH_40) 6471 switch (ibss.chandef.width) {
6292 return -EINVAL; 6472 case NL80211_CHAN_WIDTH_5:
6293 if (ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && 6473 case NL80211_CHAN_WIDTH_10:
6294 !(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) 6474 case NL80211_CHAN_WIDTH_20_NOHT:
6475 break;
6476 case NL80211_CHAN_WIDTH_20:
6477 case NL80211_CHAN_WIDTH_40:
6478 if (rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)
6479 break;
6480 default:
6295 return -EINVAL; 6481 return -EINVAL;
6482 }
6296 6483
6297 ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; 6484 ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED];
6298 ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; 6485 ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY];
@@ -6311,6 +6498,19 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
6311 return err; 6498 return err;
6312 } 6499 }
6313 6500
6501 if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
6502 memcpy(&ibss.ht_capa_mask,
6503 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]),
6504 sizeof(ibss.ht_capa_mask));
6505
6506 if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
6507 if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
6508 return -EINVAL;
6509 memcpy(&ibss.ht_capa,
6510 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]),
6511 sizeof(ibss.ht_capa));
6512 }
6513
6314 if (info->attrs[NL80211_ATTR_MCAST_RATE] && 6514 if (info->attrs[NL80211_ATTR_MCAST_RATE] &&
6315 !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate, 6515 !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate,
6316 nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) 6516 nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE])))
@@ -6393,19 +6593,30 @@ static struct genl_multicast_group nl80211_testmode_mcgrp = {
6393static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) 6593static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
6394{ 6594{
6395 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 6595 struct cfg80211_registered_device *rdev = info->user_ptr[0];
6596 struct wireless_dev *wdev =
6597 __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs);
6396 int err; 6598 int err;
6397 6599
6600 if (!rdev->ops->testmode_cmd)
6601 return -EOPNOTSUPP;
6602
6603 if (IS_ERR(wdev)) {
6604 err = PTR_ERR(wdev);
6605 if (err != -EINVAL)
6606 return err;
6607 wdev = NULL;
6608 } else if (wdev->wiphy != &rdev->wiphy) {
6609 return -EINVAL;
6610 }
6611
6398 if (!info->attrs[NL80211_ATTR_TESTDATA]) 6612 if (!info->attrs[NL80211_ATTR_TESTDATA])
6399 return -EINVAL; 6613 return -EINVAL;
6400 6614
6401 err = -EOPNOTSUPP; 6615 rdev->testmode_info = info;
6402 if (rdev->ops->testmode_cmd) { 6616 err = rdev_testmode_cmd(rdev, wdev,
6403 rdev->testmode_info = info;
6404 err = rdev_testmode_cmd(rdev,
6405 nla_data(info->attrs[NL80211_ATTR_TESTDATA]), 6617 nla_data(info->attrs[NL80211_ATTR_TESTDATA]),
6406 nla_len(info->attrs[NL80211_ATTR_TESTDATA])); 6618 nla_len(info->attrs[NL80211_ATTR_TESTDATA]));
6407 rdev->testmode_info = NULL; 6619 rdev->testmode_info = NULL;
6408 }
6409 6620
6410 return err; 6621 return err;
6411} 6622}
@@ -6419,6 +6630,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
6419 void *data = NULL; 6630 void *data = NULL;
6420 int data_len = 0; 6631 int data_len = 0;
6421 6632
6633 rtnl_lock();
6634
6422 if (cb->args[0]) { 6635 if (cb->args[0]) {
6423 /* 6636 /*
6424 * 0 is a valid index, but not valid for args[0], 6637 * 0 is a valid index, but not valid for args[0],
@@ -6430,18 +6643,16 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
6430 nl80211_fam.attrbuf, nl80211_fam.maxattr, 6643 nl80211_fam.attrbuf, nl80211_fam.maxattr,
6431 nl80211_policy); 6644 nl80211_policy);
6432 if (err) 6645 if (err)
6433 return err; 6646 goto out_err;
6434 6647
6435 mutex_lock(&cfg80211_mutex);
6436 rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), 6648 rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
6437 nl80211_fam.attrbuf); 6649 nl80211_fam.attrbuf);
6438 if (IS_ERR(rdev)) { 6650 if (IS_ERR(rdev)) {
6439 mutex_unlock(&cfg80211_mutex); 6651 err = PTR_ERR(rdev);
6440 return PTR_ERR(rdev); 6652 goto out_err;
6441 } 6653 }
6442 phy_idx = rdev->wiphy_idx; 6654 phy_idx = rdev->wiphy_idx;
6443 rdev = NULL; 6655 rdev = NULL;
6444 mutex_unlock(&cfg80211_mutex);
6445 6656
6446 if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]) 6657 if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA])
6447 cb->args[1] = 6658 cb->args[1] =
@@ -6453,14 +6664,11 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
6453 data_len = nla_len((void *)cb->args[1]); 6664 data_len = nla_len((void *)cb->args[1]);
6454 } 6665 }
6455 6666
6456 mutex_lock(&cfg80211_mutex);
6457 rdev = cfg80211_rdev_by_wiphy_idx(phy_idx); 6667 rdev = cfg80211_rdev_by_wiphy_idx(phy_idx);
6458 if (!rdev) { 6668 if (!rdev) {
6459 mutex_unlock(&cfg80211_mutex); 6669 err = -ENOENT;
6460 return -ENOENT; 6670 goto out_err;
6461 } 6671 }
6462 cfg80211_lock_rdev(rdev);
6463 mutex_unlock(&cfg80211_mutex);
6464 6672
6465 if (!rdev->ops->testmode_dump) { 6673 if (!rdev->ops->testmode_dump) {
6466 err = -EOPNOTSUPP; 6674 err = -EOPNOTSUPP;
@@ -6473,6 +6681,9 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
6473 NL80211_CMD_TESTMODE); 6681 NL80211_CMD_TESTMODE);
6474 struct nlattr *tmdata; 6682 struct nlattr *tmdata;
6475 6683
6684 if (!hdr)
6685 break;
6686
6476 if (nla_put_u32(skb, NL80211_ATTR_WIPHY, phy_idx)) { 6687 if (nla_put_u32(skb, NL80211_ATTR_WIPHY, phy_idx)) {
6477 genlmsg_cancel(skb, hdr); 6688 genlmsg_cancel(skb, hdr);
6478 break; 6689 break;
@@ -6501,7 +6712,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
6501 /* see above */ 6712 /* see above */
6502 cb->args[0] = phy_idx + 1; 6713 cb->args[0] = phy_idx + 1;
6503 out_err: 6714 out_err:
6504 cfg80211_unlock_rdev(rdev); 6715 rtnl_unlock();
6505 return err; 6716 return err;
6506} 6717}
6507 6718
@@ -6581,12 +6792,14 @@ EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb);
6581 6792
6582void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) 6793void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
6583{ 6794{
6795 struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0];
6584 void *hdr = ((void **)skb->cb)[1]; 6796 void *hdr = ((void **)skb->cb)[1];
6585 struct nlattr *data = ((void **)skb->cb)[2]; 6797 struct nlattr *data = ((void **)skb->cb)[2];
6586 6798
6587 nla_nest_end(skb, data); 6799 nla_nest_end(skb, data);
6588 genlmsg_end(skb, hdr); 6800 genlmsg_end(skb, hdr);
6589 genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp); 6801 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0,
6802 nl80211_testmode_mcgrp.id, gfp);
6590} 6803}
6591EXPORT_SYMBOL(cfg80211_testmode_event); 6804EXPORT_SYMBOL(cfg80211_testmode_event);
6592#endif 6805#endif
@@ -6709,7 +6922,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
6709 sizeof(connect.vht_capa)); 6922 sizeof(connect.vht_capa));
6710 } 6923 }
6711 6924
6712 err = cfg80211_connect(rdev, dev, &connect, connkeys); 6925 wdev_lock(dev->ieee80211_ptr);
6926 err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL);
6927 wdev_unlock(dev->ieee80211_ptr);
6713 if (err) 6928 if (err)
6714 kfree(connkeys); 6929 kfree(connkeys);
6715 return err; 6930 return err;
@@ -6720,6 +6935,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
6720 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 6935 struct cfg80211_registered_device *rdev = info->user_ptr[0];
6721 struct net_device *dev = info->user_ptr[1]; 6936 struct net_device *dev = info->user_ptr[1];
6722 u16 reason; 6937 u16 reason;
6938 int ret;
6723 6939
6724 if (!info->attrs[NL80211_ATTR_REASON_CODE]) 6940 if (!info->attrs[NL80211_ATTR_REASON_CODE])
6725 reason = WLAN_REASON_DEAUTH_LEAVING; 6941 reason = WLAN_REASON_DEAUTH_LEAVING;
@@ -6733,7 +6949,10 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
6733 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) 6949 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
6734 return -EOPNOTSUPP; 6950 return -EOPNOTSUPP;
6735 6951
6736 return cfg80211_disconnect(rdev, dev, reason, true); 6952 wdev_lock(dev->ieee80211_ptr);
6953 ret = cfg80211_disconnect(rdev, dev, reason, true);
6954 wdev_unlock(dev->ieee80211_ptr);
6955 return ret;
6737} 6956}
6738 6957
6739static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) 6958static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
@@ -6909,9 +7128,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
6909 7128
6910 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, 7129 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
6911 NL80211_CMD_REMAIN_ON_CHANNEL); 7130 NL80211_CMD_REMAIN_ON_CHANNEL);
6912 7131 if (!hdr) {
6913 if (IS_ERR(hdr)) { 7132 err = -ENOBUFS;
6914 err = PTR_ERR(hdr);
6915 goto free_msg; 7133 goto free_msg;
6916 } 7134 }
6917 7135
@@ -7152,6 +7370,9 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
7152 return -EOPNOTSUPP; 7370 return -EOPNOTSUPP;
7153 7371
7154 switch (wdev->iftype) { 7372 switch (wdev->iftype) {
7373 case NL80211_IFTYPE_P2P_DEVICE:
7374 if (!info->attrs[NL80211_ATTR_WIPHY_FREQ])
7375 return -EINVAL;
7155 case NL80211_IFTYPE_STATION: 7376 case NL80211_IFTYPE_STATION:
7156 case NL80211_IFTYPE_ADHOC: 7377 case NL80211_IFTYPE_ADHOC:
7157 case NL80211_IFTYPE_P2P_CLIENT: 7378 case NL80211_IFTYPE_P2P_CLIENT:
@@ -7159,7 +7380,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
7159 case NL80211_IFTYPE_AP_VLAN: 7380 case NL80211_IFTYPE_AP_VLAN:
7160 case NL80211_IFTYPE_MESH_POINT: 7381 case NL80211_IFTYPE_MESH_POINT:
7161 case NL80211_IFTYPE_P2P_GO: 7382 case NL80211_IFTYPE_P2P_GO:
7162 case NL80211_IFTYPE_P2P_DEVICE:
7163 break; 7383 break;
7164 default: 7384 default:
7165 return -EOPNOTSUPP; 7385 return -EOPNOTSUPP;
@@ -7187,9 +7407,18 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
7187 7407
7188 no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); 7408 no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
7189 7409
7190 err = nl80211_parse_chandef(rdev, info, &chandef); 7410 /* get the channel if any has been specified, otherwise pass NULL to
7191 if (err) 7411 * the driver. The latter will use the current one
7192 return err; 7412 */
7413 chandef.chan = NULL;
7414 if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
7415 err = nl80211_parse_chandef(rdev, info, &chandef);
7416 if (err)
7417 return err;
7418 }
7419
7420 if (!chandef.chan && offchan)
7421 return -EINVAL;
7193 7422
7194 if (!dont_wait_for_ack) { 7423 if (!dont_wait_for_ack) {
7195 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 7424 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -7198,9 +7427,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
7198 7427
7199 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, 7428 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
7200 NL80211_CMD_FRAME); 7429 NL80211_CMD_FRAME);
7201 7430 if (!hdr) {
7202 if (IS_ERR(hdr)) { 7431 err = -ENOBUFS;
7203 err = PTR_ERR(hdr);
7204 goto free_msg; 7432 goto free_msg;
7205 } 7433 }
7206 } 7434 }
@@ -7350,14 +7578,12 @@ static int nl80211_set_cqm_txe(struct genl_info *info,
7350 u32 rate, u32 pkts, u32 intvl) 7578 u32 rate, u32 pkts, u32 intvl)
7351{ 7579{
7352 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 7580 struct cfg80211_registered_device *rdev = info->user_ptr[0];
7353 struct wireless_dev *wdev;
7354 struct net_device *dev = info->user_ptr[1]; 7581 struct net_device *dev = info->user_ptr[1];
7582 struct wireless_dev *wdev = dev->ieee80211_ptr;
7355 7583
7356 if (rate > 100 || intvl > NL80211_CQM_TXE_MAX_INTVL) 7584 if (rate > 100 || intvl > NL80211_CQM_TXE_MAX_INTVL)
7357 return -EINVAL; 7585 return -EINVAL;
7358 7586
7359 wdev = dev->ieee80211_ptr;
7360
7361 if (!rdev->ops->set_cqm_txe_config) 7587 if (!rdev->ops->set_cqm_txe_config)
7362 return -EOPNOTSUPP; 7588 return -EOPNOTSUPP;
7363 7589
@@ -7372,13 +7598,15 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
7372 s32 threshold, u32 hysteresis) 7598 s32 threshold, u32 hysteresis)
7373{ 7599{
7374 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 7600 struct cfg80211_registered_device *rdev = info->user_ptr[0];
7375 struct wireless_dev *wdev;
7376 struct net_device *dev = info->user_ptr[1]; 7601 struct net_device *dev = info->user_ptr[1];
7602 struct wireless_dev *wdev = dev->ieee80211_ptr;
7377 7603
7378 if (threshold > 0) 7604 if (threshold > 0)
7379 return -EINVAL; 7605 return -EINVAL;
7380 7606
7381 wdev = dev->ieee80211_ptr; 7607 /* disabling - hysteresis should also be zero then */
7608 if (threshold == 0)
7609 hysteresis = 0;
7382 7610
7383 if (!rdev->ops->set_cqm_rssi_config) 7611 if (!rdev->ops->set_cqm_rssi_config)
7384 return -EOPNOTSUPP; 7612 return -EOPNOTSUPP;
@@ -7397,36 +7625,33 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
7397 int err; 7625 int err;
7398 7626
7399 cqm = info->attrs[NL80211_ATTR_CQM]; 7627 cqm = info->attrs[NL80211_ATTR_CQM];
7400 if (!cqm) { 7628 if (!cqm)
7401 err = -EINVAL; 7629 return -EINVAL;
7402 goto out;
7403 }
7404 7630
7405 err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm, 7631 err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
7406 nl80211_attr_cqm_policy); 7632 nl80211_attr_cqm_policy);
7407 if (err) 7633 if (err)
7408 goto out; 7634 return err;
7409 7635
7410 if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] && 7636 if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] &&
7411 attrs[NL80211_ATTR_CQM_RSSI_HYST]) { 7637 attrs[NL80211_ATTR_CQM_RSSI_HYST]) {
7412 s32 threshold; 7638 s32 threshold = nla_get_s32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
7413 u32 hysteresis; 7639 u32 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
7414 threshold = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
7415 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
7416 err = nl80211_set_cqm_rssi(info, threshold, hysteresis);
7417 } else if (attrs[NL80211_ATTR_CQM_TXE_RATE] &&
7418 attrs[NL80211_ATTR_CQM_TXE_PKTS] &&
7419 attrs[NL80211_ATTR_CQM_TXE_INTVL]) {
7420 u32 rate, pkts, intvl;
7421 rate = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_RATE]);
7422 pkts = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_PKTS]);
7423 intvl = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_INTVL]);
7424 err = nl80211_set_cqm_txe(info, rate, pkts, intvl);
7425 } else
7426 err = -EINVAL;
7427 7640
7428out: 7641 return nl80211_set_cqm_rssi(info, threshold, hysteresis);
7429 return err; 7642 }
7643
7644 if (attrs[NL80211_ATTR_CQM_TXE_RATE] &&
7645 attrs[NL80211_ATTR_CQM_TXE_PKTS] &&
7646 attrs[NL80211_ATTR_CQM_TXE_INTVL]) {
7647 u32 rate = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_RATE]);
7648 u32 pkts = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_PKTS]);
7649 u32 intvl = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_INTVL]);
7650
7651 return nl80211_set_cqm_txe(info, rate, pkts, intvl);
7652 }
7653
7654 return -EINVAL;
7430} 7655}
7431 7656
7432static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) 7657static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
@@ -7494,6 +7719,23 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
7494 setup.chandef.chan = NULL; 7719 setup.chandef.chan = NULL;
7495 } 7720 }
7496 7721
7722 if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) {
7723 u8 *rates = nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
7724 int n_rates =
7725 nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
7726 struct ieee80211_supported_band *sband;
7727
7728 if (!setup.chandef.chan)
7729 return -EINVAL;
7730
7731 sband = rdev->wiphy.bands[setup.chandef.chan->band];
7732
7733 err = ieee80211_get_ratemask(sband, rates, n_rates,
7734 &setup.basic_rates);
7735 if (err)
7736 return err;
7737 }
7738
7497 return cfg80211_join_mesh(rdev, dev, &setup, &cfg); 7739 return cfg80211_join_mesh(rdev, dev, &setup, &cfg);
7498} 7740}
7499 7741
@@ -7509,28 +7751,28 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)
7509static int nl80211_send_wowlan_patterns(struct sk_buff *msg, 7751static int nl80211_send_wowlan_patterns(struct sk_buff *msg,
7510 struct cfg80211_registered_device *rdev) 7752 struct cfg80211_registered_device *rdev)
7511{ 7753{
7754 struct cfg80211_wowlan *wowlan = rdev->wiphy.wowlan_config;
7512 struct nlattr *nl_pats, *nl_pat; 7755 struct nlattr *nl_pats, *nl_pat;
7513 int i, pat_len; 7756 int i, pat_len;
7514 7757
7515 if (!rdev->wowlan->n_patterns) 7758 if (!wowlan->n_patterns)
7516 return 0; 7759 return 0;
7517 7760
7518 nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); 7761 nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN);
7519 if (!nl_pats) 7762 if (!nl_pats)
7520 return -ENOBUFS; 7763 return -ENOBUFS;
7521 7764
7522 for (i = 0; i < rdev->wowlan->n_patterns; i++) { 7765 for (i = 0; i < wowlan->n_patterns; i++) {
7523 nl_pat = nla_nest_start(msg, i + 1); 7766 nl_pat = nla_nest_start(msg, i + 1);
7524 if (!nl_pat) 7767 if (!nl_pat)
7525 return -ENOBUFS; 7768 return -ENOBUFS;
7526 pat_len = rdev->wowlan->patterns[i].pattern_len; 7769 pat_len = wowlan->patterns[i].pattern_len;
7527 if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, 7770 if (nla_put(msg, NL80211_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8),
7528 DIV_ROUND_UP(pat_len, 8), 7771 wowlan->patterns[i].mask) ||
7529 rdev->wowlan->patterns[i].mask) || 7772 nla_put(msg, NL80211_PKTPAT_PATTERN, pat_len,
7530 nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, 7773 wowlan->patterns[i].pattern) ||
7531 pat_len, rdev->wowlan->patterns[i].pattern) || 7774 nla_put_u32(msg, NL80211_PKTPAT_OFFSET,
7532 nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, 7775 wowlan->patterns[i].pkt_offset))
7533 rdev->wowlan->patterns[i].pkt_offset))
7534 return -ENOBUFS; 7776 return -ENOBUFS;
7535 nla_nest_end(msg, nl_pat); 7777 nla_nest_end(msg, nl_pat);
7536 } 7778 }
@@ -7589,16 +7831,15 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
7589 void *hdr; 7831 void *hdr;
7590 u32 size = NLMSG_DEFAULT_SIZE; 7832 u32 size = NLMSG_DEFAULT_SIZE;
7591 7833
7592 if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && 7834 if (!rdev->wiphy.wowlan)
7593 !rdev->wiphy.wowlan.tcp)
7594 return -EOPNOTSUPP; 7835 return -EOPNOTSUPP;
7595 7836
7596 if (rdev->wowlan && rdev->wowlan->tcp) { 7837 if (rdev->wiphy.wowlan_config && rdev->wiphy.wowlan_config->tcp) {
7597 /* adjust size to have room for all the data */ 7838 /* adjust size to have room for all the data */
7598 size += rdev->wowlan->tcp->tokens_size + 7839 size += rdev->wiphy.wowlan_config->tcp->tokens_size +
7599 rdev->wowlan->tcp->payload_len + 7840 rdev->wiphy.wowlan_config->tcp->payload_len +
7600 rdev->wowlan->tcp->wake_len + 7841 rdev->wiphy.wowlan_config->tcp->wake_len +
7601 rdev->wowlan->tcp->wake_len / 8; 7842 rdev->wiphy.wowlan_config->tcp->wake_len / 8;
7602 } 7843 }
7603 7844
7604 msg = nlmsg_new(size, GFP_KERNEL); 7845 msg = nlmsg_new(size, GFP_KERNEL);
@@ -7610,33 +7851,34 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
7610 if (!hdr) 7851 if (!hdr)
7611 goto nla_put_failure; 7852 goto nla_put_failure;
7612 7853
7613 if (rdev->wowlan) { 7854 if (rdev->wiphy.wowlan_config) {
7614 struct nlattr *nl_wowlan; 7855 struct nlattr *nl_wowlan;
7615 7856
7616 nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); 7857 nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS);
7617 if (!nl_wowlan) 7858 if (!nl_wowlan)
7618 goto nla_put_failure; 7859 goto nla_put_failure;
7619 7860
7620 if ((rdev->wowlan->any && 7861 if ((rdev->wiphy.wowlan_config->any &&
7621 nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || 7862 nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
7622 (rdev->wowlan->disconnect && 7863 (rdev->wiphy.wowlan_config->disconnect &&
7623 nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || 7864 nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
7624 (rdev->wowlan->magic_pkt && 7865 (rdev->wiphy.wowlan_config->magic_pkt &&
7625 nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || 7866 nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
7626 (rdev->wowlan->gtk_rekey_failure && 7867 (rdev->wiphy.wowlan_config->gtk_rekey_failure &&
7627 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || 7868 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
7628 (rdev->wowlan->eap_identity_req && 7869 (rdev->wiphy.wowlan_config->eap_identity_req &&
7629 nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || 7870 nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
7630 (rdev->wowlan->four_way_handshake && 7871 (rdev->wiphy.wowlan_config->four_way_handshake &&
7631 nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || 7872 nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
7632 (rdev->wowlan->rfkill_release && 7873 (rdev->wiphy.wowlan_config->rfkill_release &&
7633 nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) 7874 nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
7634 goto nla_put_failure; 7875 goto nla_put_failure;
7635 7876
7636 if (nl80211_send_wowlan_patterns(msg, rdev)) 7877 if (nl80211_send_wowlan_patterns(msg, rdev))
7637 goto nla_put_failure; 7878 goto nla_put_failure;
7638 7879
7639 if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp)) 7880 if (nl80211_send_wowlan_tcp(msg,
7881 rdev->wiphy.wowlan_config->tcp))
7640 goto nla_put_failure; 7882 goto nla_put_failure;
7641 7883
7642 nla_nest_end(msg, nl_wowlan); 7884 nla_nest_end(msg, nl_wowlan);
@@ -7662,7 +7904,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
7662 u32 data_size, wake_size, tokens_size = 0, wake_mask_size; 7904 u32 data_size, wake_size, tokens_size = 0, wake_mask_size;
7663 int err, port; 7905 int err, port;
7664 7906
7665 if (!rdev->wiphy.wowlan.tcp) 7907 if (!rdev->wiphy.wowlan->tcp)
7666 return -EINVAL; 7908 return -EINVAL;
7667 7909
7668 err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, 7910 err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP,
@@ -7682,16 +7924,16 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
7682 return -EINVAL; 7924 return -EINVAL;
7683 7925
7684 data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); 7926 data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]);
7685 if (data_size > rdev->wiphy.wowlan.tcp->data_payload_max) 7927 if (data_size > rdev->wiphy.wowlan->tcp->data_payload_max)
7686 return -EINVAL; 7928 return -EINVAL;
7687 7929
7688 if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > 7930 if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) >
7689 rdev->wiphy.wowlan.tcp->data_interval_max || 7931 rdev->wiphy.wowlan->tcp->data_interval_max ||
7690 nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0) 7932 nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0)
7691 return -EINVAL; 7933 return -EINVAL;
7692 7934
7693 wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); 7935 wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]);
7694 if (wake_size > rdev->wiphy.wowlan.tcp->wake_payload_max) 7936 if (wake_size > rdev->wiphy.wowlan->tcp->wake_payload_max)
7695 return -EINVAL; 7937 return -EINVAL;
7696 7938
7697 wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); 7939 wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]);
@@ -7706,13 +7948,13 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
7706 7948
7707 if (!tok->len || tokens_size % tok->len) 7949 if (!tok->len || tokens_size % tok->len)
7708 return -EINVAL; 7950 return -EINVAL;
7709 if (!rdev->wiphy.wowlan.tcp->tok) 7951 if (!rdev->wiphy.wowlan->tcp->tok)
7710 return -EINVAL; 7952 return -EINVAL;
7711 if (tok->len > rdev->wiphy.wowlan.tcp->tok->max_len) 7953 if (tok->len > rdev->wiphy.wowlan->tcp->tok->max_len)
7712 return -EINVAL; 7954 return -EINVAL;
7713 if (tok->len < rdev->wiphy.wowlan.tcp->tok->min_len) 7955 if (tok->len < rdev->wiphy.wowlan->tcp->tok->min_len)
7714 return -EINVAL; 7956 return -EINVAL;
7715 if (tokens_size > rdev->wiphy.wowlan.tcp->tok->bufsize) 7957 if (tokens_size > rdev->wiphy.wowlan->tcp->tok->bufsize)
7716 return -EINVAL; 7958 return -EINVAL;
7717 if (tok->offset + tok->len > data_size) 7959 if (tok->offset + tok->len > data_size)
7718 return -EINVAL; 7960 return -EINVAL;
@@ -7720,7 +7962,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
7720 7962
7721 if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { 7963 if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) {
7722 seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); 7964 seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]);
7723 if (!rdev->wiphy.wowlan.tcp->seq) 7965 if (!rdev->wiphy.wowlan->tcp->seq)
7724 return -EINVAL; 7966 return -EINVAL;
7725 if (seq->len == 0 || seq->len > 4) 7967 if (seq->len == 0 || seq->len > 4)
7726 return -EINVAL; 7968 return -EINVAL;
@@ -7801,17 +8043,16 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
7801 struct nlattr *tb[NUM_NL80211_WOWLAN_TRIG]; 8043 struct nlattr *tb[NUM_NL80211_WOWLAN_TRIG];
7802 struct cfg80211_wowlan new_triggers = {}; 8044 struct cfg80211_wowlan new_triggers = {};
7803 struct cfg80211_wowlan *ntrig; 8045 struct cfg80211_wowlan *ntrig;
7804 struct wiphy_wowlan_support *wowlan = &rdev->wiphy.wowlan; 8046 const struct wiphy_wowlan_support *wowlan = rdev->wiphy.wowlan;
7805 int err, i; 8047 int err, i;
7806 bool prev_enabled = rdev->wowlan; 8048 bool prev_enabled = rdev->wiphy.wowlan_config;
7807 8049
7808 if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && 8050 if (!wowlan)
7809 !rdev->wiphy.wowlan.tcp)
7810 return -EOPNOTSUPP; 8051 return -EOPNOTSUPP;
7811 8052
7812 if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { 8053 if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) {
7813 cfg80211_rdev_free_wowlan(rdev); 8054 cfg80211_rdev_free_wowlan(rdev);
7814 rdev->wowlan = NULL; 8055 rdev->wiphy.wowlan_config = NULL;
7815 goto set_wakeup; 8056 goto set_wakeup;
7816 } 8057 }
7817 8058
@@ -7871,7 +8112,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
7871 struct nlattr *pat; 8112 struct nlattr *pat;
7872 int n_patterns = 0; 8113 int n_patterns = 0;
7873 int rem, pat_len, mask_len, pkt_offset; 8114 int rem, pat_len, mask_len, pkt_offset;
7874 struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT]; 8115 struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
7875 8116
7876 nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], 8117 nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
7877 rem) 8118 rem)
@@ -7890,26 +8131,25 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
7890 8131
7891 nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], 8132 nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
7892 rem) { 8133 rem) {
7893 nla_parse(pat_tb, MAX_NL80211_WOWLAN_PKTPAT, 8134 nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
7894 nla_data(pat), nla_len(pat), NULL); 8135 nla_len(pat), NULL);
7895 err = -EINVAL; 8136 err = -EINVAL;
7896 if (!pat_tb[NL80211_WOWLAN_PKTPAT_MASK] || 8137 if (!pat_tb[NL80211_PKTPAT_MASK] ||
7897 !pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]) 8138 !pat_tb[NL80211_PKTPAT_PATTERN])
7898 goto error; 8139 goto error;
7899 pat_len = nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]); 8140 pat_len = nla_len(pat_tb[NL80211_PKTPAT_PATTERN]);
7900 mask_len = DIV_ROUND_UP(pat_len, 8); 8141 mask_len = DIV_ROUND_UP(pat_len, 8);
7901 if (nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]) != 8142 if (nla_len(pat_tb[NL80211_PKTPAT_MASK]) != mask_len)
7902 mask_len)
7903 goto error; 8143 goto error;
7904 if (pat_len > wowlan->pattern_max_len || 8144 if (pat_len > wowlan->pattern_max_len ||
7905 pat_len < wowlan->pattern_min_len) 8145 pat_len < wowlan->pattern_min_len)
7906 goto error; 8146 goto error;
7907 8147
7908 if (!pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]) 8148 if (!pat_tb[NL80211_PKTPAT_OFFSET])
7909 pkt_offset = 0; 8149 pkt_offset = 0;
7910 else 8150 else
7911 pkt_offset = nla_get_u32( 8151 pkt_offset = nla_get_u32(
7912 pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]); 8152 pat_tb[NL80211_PKTPAT_OFFSET]);
7913 if (pkt_offset > wowlan->max_pkt_offset) 8153 if (pkt_offset > wowlan->max_pkt_offset)
7914 goto error; 8154 goto error;
7915 new_triggers.patterns[i].pkt_offset = pkt_offset; 8155 new_triggers.patterns[i].pkt_offset = pkt_offset;
@@ -7923,11 +8163,11 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
7923 new_triggers.patterns[i].pattern = 8163 new_triggers.patterns[i].pattern =
7924 new_triggers.patterns[i].mask + mask_len; 8164 new_triggers.patterns[i].mask + mask_len;
7925 memcpy(new_triggers.patterns[i].mask, 8165 memcpy(new_triggers.patterns[i].mask,
7926 nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]), 8166 nla_data(pat_tb[NL80211_PKTPAT_MASK]),
7927 mask_len); 8167 mask_len);
7928 new_triggers.patterns[i].pattern_len = pat_len; 8168 new_triggers.patterns[i].pattern_len = pat_len;
7929 memcpy(new_triggers.patterns[i].pattern, 8169 memcpy(new_triggers.patterns[i].pattern,
7930 nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]), 8170 nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
7931 pat_len); 8171 pat_len);
7932 i++; 8172 i++;
7933 } 8173 }
@@ -7947,11 +8187,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
7947 goto error; 8187 goto error;
7948 } 8188 }
7949 cfg80211_rdev_free_wowlan(rdev); 8189 cfg80211_rdev_free_wowlan(rdev);
7950 rdev->wowlan = ntrig; 8190 rdev->wiphy.wowlan_config = ntrig;
7951 8191
7952 set_wakeup: 8192 set_wakeup:
7953 if (rdev->ops->set_wakeup && prev_enabled != !!rdev->wowlan) 8193 if (rdev->ops->set_wakeup &&
7954 rdev_set_wakeup(rdev, rdev->wowlan); 8194 prev_enabled != !!rdev->wiphy.wowlan_config)
8195 rdev_set_wakeup(rdev, rdev->wiphy.wowlan_config);
7955 8196
7956 return 0; 8197 return 0;
7957 error: 8198 error:
@@ -7965,6 +8206,264 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
7965} 8206}
7966#endif 8207#endif
7967 8208
8209static int nl80211_send_coalesce_rules(struct sk_buff *msg,
8210 struct cfg80211_registered_device *rdev)
8211{
8212 struct nlattr *nl_pats, *nl_pat, *nl_rule, *nl_rules;
8213 int i, j, pat_len;
8214 struct cfg80211_coalesce_rules *rule;
8215
8216 if (!rdev->coalesce->n_rules)
8217 return 0;
8218
8219 nl_rules = nla_nest_start(msg, NL80211_ATTR_COALESCE_RULE);
8220 if (!nl_rules)
8221 return -ENOBUFS;
8222
8223 for (i = 0; i < rdev->coalesce->n_rules; i++) {
8224 nl_rule = nla_nest_start(msg, i + 1);
8225 if (!nl_rule)
8226 return -ENOBUFS;
8227
8228 rule = &rdev->coalesce->rules[i];
8229 if (nla_put_u32(msg, NL80211_ATTR_COALESCE_RULE_DELAY,
8230 rule->delay))
8231 return -ENOBUFS;
8232
8233 if (nla_put_u32(msg, NL80211_ATTR_COALESCE_RULE_CONDITION,
8234 rule->condition))
8235 return -ENOBUFS;
8236
8237 nl_pats = nla_nest_start(msg,
8238 NL80211_ATTR_COALESCE_RULE_PKT_PATTERN);
8239 if (!nl_pats)
8240 return -ENOBUFS;
8241
8242 for (j = 0; j < rule->n_patterns; j++) {
8243 nl_pat = nla_nest_start(msg, j + 1);
8244 if (!nl_pat)
8245 return -ENOBUFS;
8246 pat_len = rule->patterns[j].pattern_len;
8247 if (nla_put(msg, NL80211_PKTPAT_MASK,
8248 DIV_ROUND_UP(pat_len, 8),
8249 rule->patterns[j].mask) ||
8250 nla_put(msg, NL80211_PKTPAT_PATTERN, pat_len,
8251 rule->patterns[j].pattern) ||
8252 nla_put_u32(msg, NL80211_PKTPAT_OFFSET,
8253 rule->patterns[j].pkt_offset))
8254 return -ENOBUFS;
8255 nla_nest_end(msg, nl_pat);
8256 }
8257 nla_nest_end(msg, nl_pats);
8258 nla_nest_end(msg, nl_rule);
8259 }
8260 nla_nest_end(msg, nl_rules);
8261
8262 return 0;
8263}
8264
8265static int nl80211_get_coalesce(struct sk_buff *skb, struct genl_info *info)
8266{
8267 struct cfg80211_registered_device *rdev = info->user_ptr[0];
8268 struct sk_buff *msg;
8269 void *hdr;
8270
8271 if (!rdev->wiphy.coalesce)
8272 return -EOPNOTSUPP;
8273
8274 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
8275 if (!msg)
8276 return -ENOMEM;
8277
8278 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
8279 NL80211_CMD_GET_COALESCE);
8280 if (!hdr)
8281 goto nla_put_failure;
8282
8283 if (rdev->coalesce && nl80211_send_coalesce_rules(msg, rdev))
8284 goto nla_put_failure;
8285
8286 genlmsg_end(msg, hdr);
8287 return genlmsg_reply(msg, info);
8288
8289nla_put_failure:
8290 nlmsg_free(msg);
8291 return -ENOBUFS;
8292}
8293
8294void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev)
8295{
8296 struct cfg80211_coalesce *coalesce = rdev->coalesce;
8297 int i, j;
8298 struct cfg80211_coalesce_rules *rule;
8299
8300 if (!coalesce)
8301 return;
8302
8303 for (i = 0; i < coalesce->n_rules; i++) {
8304 rule = &coalesce->rules[i];
8305 for (j = 0; j < rule->n_patterns; j++)
8306 kfree(rule->patterns[j].mask);
8307 kfree(rule->patterns);
8308 }
8309 kfree(coalesce->rules);
8310 kfree(coalesce);
8311 rdev->coalesce = NULL;
8312}
8313
8314static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
8315 struct nlattr *rule,
8316 struct cfg80211_coalesce_rules *new_rule)
8317{
8318 int err, i;
8319 const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce;
8320 struct nlattr *tb[NUM_NL80211_ATTR_COALESCE_RULE], *pat;
8321 int rem, pat_len, mask_len, pkt_offset, n_patterns = 0;
8322 struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
8323
8324 err = nla_parse(tb, NL80211_ATTR_COALESCE_RULE_MAX, nla_data(rule),
8325 nla_len(rule), nl80211_coalesce_policy);
8326 if (err)
8327 return err;
8328
8329 if (tb[NL80211_ATTR_COALESCE_RULE_DELAY])
8330 new_rule->delay =
8331 nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_DELAY]);
8332 if (new_rule->delay > coalesce->max_delay)
8333 return -EINVAL;
8334
8335 if (tb[NL80211_ATTR_COALESCE_RULE_CONDITION])
8336 new_rule->condition =
8337 nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_CONDITION]);
8338 if (new_rule->condition != NL80211_COALESCE_CONDITION_MATCH &&
8339 new_rule->condition != NL80211_COALESCE_CONDITION_NO_MATCH)
8340 return -EINVAL;
8341
8342 if (!tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN])
8343 return -EINVAL;
8344
8345 nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN],
8346 rem)
8347 n_patterns++;
8348 if (n_patterns > coalesce->n_patterns)
8349 return -EINVAL;
8350
8351 new_rule->patterns = kcalloc(n_patterns, sizeof(new_rule->patterns[0]),
8352 GFP_KERNEL);
8353 if (!new_rule->patterns)
8354 return -ENOMEM;
8355
8356 new_rule->n_patterns = n_patterns;
8357 i = 0;
8358
8359 nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN],
8360 rem) {
8361 nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
8362 nla_len(pat), NULL);
8363 if (!pat_tb[NL80211_PKTPAT_MASK] ||
8364 !pat_tb[NL80211_PKTPAT_PATTERN])
8365 return -EINVAL;
8366 pat_len = nla_len(pat_tb[NL80211_PKTPAT_PATTERN]);
8367 mask_len = DIV_ROUND_UP(pat_len, 8);
8368 if (nla_len(pat_tb[NL80211_PKTPAT_MASK]) != mask_len)
8369 return -EINVAL;
8370 if (pat_len > coalesce->pattern_max_len ||
8371 pat_len < coalesce->pattern_min_len)
8372 return -EINVAL;
8373
8374 if (!pat_tb[NL80211_PKTPAT_OFFSET])
8375 pkt_offset = 0;
8376 else
8377 pkt_offset = nla_get_u32(pat_tb[NL80211_PKTPAT_OFFSET]);
8378 if (pkt_offset > coalesce->max_pkt_offset)
8379 return -EINVAL;
8380 new_rule->patterns[i].pkt_offset = pkt_offset;
8381
8382 new_rule->patterns[i].mask =
8383 kmalloc(mask_len + pat_len, GFP_KERNEL);
8384 if (!new_rule->patterns[i].mask)
8385 return -ENOMEM;
8386 new_rule->patterns[i].pattern =
8387 new_rule->patterns[i].mask + mask_len;
8388 memcpy(new_rule->patterns[i].mask,
8389 nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len);
8390 new_rule->patterns[i].pattern_len = pat_len;
8391 memcpy(new_rule->patterns[i].pattern,
8392 nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len);
8393 i++;
8394 }
8395
8396 return 0;
8397}
8398
8399static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info)
8400{
8401 struct cfg80211_registered_device *rdev = info->user_ptr[0];
8402 const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce;
8403 struct cfg80211_coalesce new_coalesce = {};
8404 struct cfg80211_coalesce *n_coalesce;
8405 int err, rem_rule, n_rules = 0, i, j;
8406 struct nlattr *rule;
8407 struct cfg80211_coalesce_rules *tmp_rule;
8408
8409 if (!rdev->wiphy.coalesce || !rdev->ops->set_coalesce)
8410 return -EOPNOTSUPP;
8411
8412 if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) {
8413 cfg80211_rdev_free_coalesce(rdev);
8414 rdev->ops->set_coalesce(&rdev->wiphy, NULL);
8415 return 0;
8416 }
8417
8418 nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE],
8419 rem_rule)
8420 n_rules++;
8421 if (n_rules > coalesce->n_rules)
8422 return -EINVAL;
8423
8424 new_coalesce.rules = kcalloc(n_rules, sizeof(new_coalesce.rules[0]),
8425 GFP_KERNEL);
8426 if (!new_coalesce.rules)
8427 return -ENOMEM;
8428
8429 new_coalesce.n_rules = n_rules;
8430 i = 0;
8431
8432 nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE],
8433 rem_rule) {
8434 err = nl80211_parse_coalesce_rule(rdev, rule,
8435 &new_coalesce.rules[i]);
8436 if (err)
8437 goto error;
8438
8439 i++;
8440 }
8441
8442 err = rdev->ops->set_coalesce(&rdev->wiphy, &new_coalesce);
8443 if (err)
8444 goto error;
8445
8446 n_coalesce = kmemdup(&new_coalesce, sizeof(new_coalesce), GFP_KERNEL);
8447 if (!n_coalesce) {
8448 err = -ENOMEM;
8449 goto error;
8450 }
8451 cfg80211_rdev_free_coalesce(rdev);
8452 rdev->coalesce = n_coalesce;
8453
8454 return 0;
8455error:
8456 for (i = 0; i < new_coalesce.n_rules; i++) {
8457 tmp_rule = &new_coalesce.rules[i];
8458 for (j = 0; j < tmp_rule->n_patterns; j++)
8459 kfree(tmp_rule->patterns[j].mask);
8460 kfree(tmp_rule->patterns);
8461 }
8462 kfree(new_coalesce.rules);
8463
8464 return err;
8465}
8466
7968static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) 8467static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
7969{ 8468{
7970 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 8469 struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -8061,9 +8560,8 @@ static int nl80211_probe_client(struct sk_buff *skb,
8061 8560
8062 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, 8561 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
8063 NL80211_CMD_PROBE_CLIENT); 8562 NL80211_CMD_PROBE_CLIENT);
8064 8563 if (!hdr) {
8065 if (IS_ERR(hdr)) { 8564 err = -ENOBUFS;
8066 err = PTR_ERR(hdr);
8067 goto free_msg; 8565 goto free_msg;
8068 } 8566 }
8069 8567
@@ -8136,9 +8634,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
8136 if (wdev->p2p_started) 8634 if (wdev->p2p_started)
8137 return 0; 8635 return 0;
8138 8636
8139 mutex_lock(&rdev->devlist_mtx);
8140 err = cfg80211_can_add_interface(rdev, wdev->iftype); 8637 err = cfg80211_can_add_interface(rdev, wdev->iftype);
8141 mutex_unlock(&rdev->devlist_mtx);
8142 if (err) 8638 if (err)
8143 return err; 8639 return err;
8144 8640
@@ -8147,9 +8643,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
8147 return err; 8643 return err;
8148 8644
8149 wdev->p2p_started = true; 8645 wdev->p2p_started = true;
8150 mutex_lock(&rdev->devlist_mtx);
8151 rdev->opencount++; 8646 rdev->opencount++;
8152 mutex_unlock(&rdev->devlist_mtx);
8153 8647
8154 return 0; 8648 return 0;
8155} 8649}
@@ -8165,11 +8659,7 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
8165 if (!rdev->ops->stop_p2p_device) 8659 if (!rdev->ops->stop_p2p_device)
8166 return -EOPNOTSUPP; 8660 return -EOPNOTSUPP;
8167 8661
8168 mutex_lock(&rdev->devlist_mtx);
8169 mutex_lock(&rdev->sched_scan_mtx);
8170 cfg80211_stop_p2p_device(rdev, wdev); 8662 cfg80211_stop_p2p_device(rdev, wdev);
8171 mutex_unlock(&rdev->sched_scan_mtx);
8172 mutex_unlock(&rdev->devlist_mtx);
8173 8663
8174 return 0; 8664 return 0;
8175} 8665}
@@ -8312,11 +8802,11 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
8312 info->user_ptr[0] = rdev; 8802 info->user_ptr[0] = rdev;
8313 } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV || 8803 } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV ||
8314 ops->internal_flags & NL80211_FLAG_NEED_WDEV) { 8804 ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
8315 mutex_lock(&cfg80211_mutex); 8805 ASSERT_RTNL();
8806
8316 wdev = __cfg80211_wdev_from_attrs(genl_info_net(info), 8807 wdev = __cfg80211_wdev_from_attrs(genl_info_net(info),
8317 info->attrs); 8808 info->attrs);
8318 if (IS_ERR(wdev)) { 8809 if (IS_ERR(wdev)) {
8319 mutex_unlock(&cfg80211_mutex);
8320 if (rtnl) 8810 if (rtnl)
8321 rtnl_unlock(); 8811 rtnl_unlock();
8322 return PTR_ERR(wdev); 8812 return PTR_ERR(wdev);
@@ -8327,7 +8817,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
8327 8817
8328 if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { 8818 if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) {
8329 if (!dev) { 8819 if (!dev) {
8330 mutex_unlock(&cfg80211_mutex);
8331 if (rtnl) 8820 if (rtnl)
8332 rtnl_unlock(); 8821 rtnl_unlock();
8333 return -EINVAL; 8822 return -EINVAL;
@@ -8341,7 +8830,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
8341 if (dev) { 8830 if (dev) {
8342 if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && 8831 if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
8343 !netif_running(dev)) { 8832 !netif_running(dev)) {
8344 mutex_unlock(&cfg80211_mutex);
8345 if (rtnl) 8833 if (rtnl)
8346 rtnl_unlock(); 8834 rtnl_unlock();
8347 return -ENETDOWN; 8835 return -ENETDOWN;
@@ -8350,17 +8838,12 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
8350 dev_hold(dev); 8838 dev_hold(dev);
8351 } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { 8839 } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) {
8352 if (!wdev->p2p_started) { 8840 if (!wdev->p2p_started) {
8353 mutex_unlock(&cfg80211_mutex);
8354 if (rtnl) 8841 if (rtnl)
8355 rtnl_unlock(); 8842 rtnl_unlock();
8356 return -ENETDOWN; 8843 return -ENETDOWN;
8357 } 8844 }
8358 } 8845 }
8359 8846
8360 cfg80211_lock_rdev(rdev);
8361
8362 mutex_unlock(&cfg80211_mutex);
8363
8364 info->user_ptr[0] = rdev; 8847 info->user_ptr[0] = rdev;
8365 } 8848 }
8366 8849
@@ -8370,8 +8853,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
8370static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, 8853static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb,
8371 struct genl_info *info) 8854 struct genl_info *info)
8372{ 8855{
8373 if (info->user_ptr[0])
8374 cfg80211_unlock_rdev(info->user_ptr[0]);
8375 if (info->user_ptr[1]) { 8856 if (info->user_ptr[1]) {
8376 if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) { 8857 if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
8377 struct wireless_dev *wdev = info->user_ptr[1]; 8858 struct wireless_dev *wdev = info->user_ptr[1];
@@ -8391,9 +8872,11 @@ static struct genl_ops nl80211_ops[] = {
8391 .cmd = NL80211_CMD_GET_WIPHY, 8872 .cmd = NL80211_CMD_GET_WIPHY,
8392 .doit = nl80211_get_wiphy, 8873 .doit = nl80211_get_wiphy,
8393 .dumpit = nl80211_dump_wiphy, 8874 .dumpit = nl80211_dump_wiphy,
8875 .done = nl80211_dump_wiphy_done,
8394 .policy = nl80211_policy, 8876 .policy = nl80211_policy,
8395 /* can be retrieved by unprivileged users */ 8877 /* can be retrieved by unprivileged users */
8396 .internal_flags = NL80211_FLAG_NEED_WIPHY, 8878 .internal_flags = NL80211_FLAG_NEED_WIPHY |
8879 NL80211_FLAG_NEED_RTNL,
8397 }, 8880 },
8398 { 8881 {
8399 .cmd = NL80211_CMD_SET_WIPHY, 8882 .cmd = NL80211_CMD_SET_WIPHY,
@@ -8408,7 +8891,8 @@ static struct genl_ops nl80211_ops[] = {
8408 .dumpit = nl80211_dump_interface, 8891 .dumpit = nl80211_dump_interface,
8409 .policy = nl80211_policy, 8892 .policy = nl80211_policy,
8410 /* can be retrieved by unprivileged users */ 8893 /* can be retrieved by unprivileged users */
8411 .internal_flags = NL80211_FLAG_NEED_WDEV, 8894 .internal_flags = NL80211_FLAG_NEED_WDEV |
8895 NL80211_FLAG_NEED_RTNL,
8412 }, 8896 },
8413 { 8897 {
8414 .cmd = NL80211_CMD_SET_INTERFACE, 8898 .cmd = NL80211_CMD_SET_INTERFACE,
@@ -8567,6 +9051,7 @@ static struct genl_ops nl80211_ops[] = {
8567 .cmd = NL80211_CMD_GET_REG, 9051 .cmd = NL80211_CMD_GET_REG,
8568 .doit = nl80211_get_reg, 9052 .doit = nl80211_get_reg,
8569 .policy = nl80211_policy, 9053 .policy = nl80211_policy,
9054 .internal_flags = NL80211_FLAG_NEED_RTNL,
8570 /* can be retrieved by unprivileged users */ 9055 /* can be retrieved by unprivileged users */
8571 }, 9056 },
8572 { 9057 {
@@ -8574,6 +9059,7 @@ static struct genl_ops nl80211_ops[] = {
8574 .doit = nl80211_set_reg, 9059 .doit = nl80211_set_reg,
8575 .policy = nl80211_policy, 9060 .policy = nl80211_policy,
8576 .flags = GENL_ADMIN_PERM, 9061 .flags = GENL_ADMIN_PERM,
9062 .internal_flags = NL80211_FLAG_NEED_RTNL,
8577 }, 9063 },
8578 { 9064 {
8579 .cmd = NL80211_CMD_REQ_SET_REG, 9065 .cmd = NL80211_CMD_REQ_SET_REG,
@@ -8984,7 +9470,30 @@ static struct genl_ops nl80211_ops[] = {
8984 .flags = GENL_ADMIN_PERM, 9470 .flags = GENL_ADMIN_PERM,
8985 .internal_flags = NL80211_FLAG_NEED_WDEV_UP | 9471 .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
8986 NL80211_FLAG_NEED_RTNL, 9472 NL80211_FLAG_NEED_RTNL,
8987 } 9473 },
9474 {
9475 .cmd = NL80211_CMD_GET_COALESCE,
9476 .doit = nl80211_get_coalesce,
9477 .policy = nl80211_policy,
9478 .internal_flags = NL80211_FLAG_NEED_WIPHY |
9479 NL80211_FLAG_NEED_RTNL,
9480 },
9481 {
9482 .cmd = NL80211_CMD_SET_COALESCE,
9483 .doit = nl80211_set_coalesce,
9484 .policy = nl80211_policy,
9485 .flags = GENL_ADMIN_PERM,
9486 .internal_flags = NL80211_FLAG_NEED_WIPHY |
9487 NL80211_FLAG_NEED_RTNL,
9488 },
9489 {
9490 .cmd = NL80211_CMD_CHANNEL_SWITCH,
9491 .doit = nl80211_channel_switch,
9492 .policy = nl80211_policy,
9493 .flags = GENL_ADMIN_PERM,
9494 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
9495 NL80211_FLAG_NEED_RTNL,
9496 },
8988}; 9497};
8989 9498
8990static struct genl_multicast_group nl80211_mlme_mcgrp = { 9499static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -9007,13 +9516,13 @@ static struct genl_multicast_group nl80211_regulatory_mcgrp = {
9007void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) 9516void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
9008{ 9517{
9009 struct sk_buff *msg; 9518 struct sk_buff *msg;
9519 struct nl80211_dump_wiphy_state state = {};
9010 9520
9011 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 9521 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
9012 if (!msg) 9522 if (!msg)
9013 return; 9523 return;
9014 9524
9015 if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, 9525 if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, &state) < 0) {
9016 false, NULL, NULL, NULL) < 0) {
9017 nlmsg_free(msg); 9526 nlmsg_free(msg);
9018 return; 9527 return;
9019 } 9528 }
@@ -9029,8 +9538,6 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
9029 struct nlattr *nest; 9538 struct nlattr *nest;
9030 int i; 9539 int i;
9031 9540
9032 lockdep_assert_held(&rdev->sched_scan_mtx);
9033
9034 if (WARN_ON(!req)) 9541 if (WARN_ON(!req))
9035 return 0; 9542 return 0;
9036 9543
@@ -9337,31 +9844,27 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
9337 NL80211_CMD_DISASSOCIATE, gfp); 9844 NL80211_CMD_DISASSOCIATE, gfp);
9338} 9845}
9339 9846
9340void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, 9847void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
9341 size_t len) 9848 size_t len)
9342{ 9849{
9343 struct wireless_dev *wdev = dev->ieee80211_ptr; 9850 struct wireless_dev *wdev = dev->ieee80211_ptr;
9344 struct wiphy *wiphy = wdev->wiphy; 9851 struct wiphy *wiphy = wdev->wiphy;
9345 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 9852 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9853 const struct ieee80211_mgmt *mgmt = (void *)buf;
9854 u32 cmd;
9346 9855
9347 trace_cfg80211_send_unprot_deauth(dev); 9856 if (WARN_ON(len < 2))
9348 nl80211_send_mlme_event(rdev, dev, buf, len, 9857 return;
9349 NL80211_CMD_UNPROT_DEAUTHENTICATE, GFP_ATOMIC);
9350}
9351EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
9352 9858
9353void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, 9859 if (ieee80211_is_deauth(mgmt->frame_control))
9354 size_t len) 9860 cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE;
9355{ 9861 else
9356 struct wireless_dev *wdev = dev->ieee80211_ptr; 9862 cmd = NL80211_CMD_UNPROT_DISASSOCIATE;
9357 struct wiphy *wiphy = wdev->wiphy;
9358 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9359 9863
9360 trace_cfg80211_send_unprot_disassoc(dev); 9864 trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len);
9361 nl80211_send_mlme_event(rdev, dev, buf, len, 9865 nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC);
9362 NL80211_CMD_UNPROT_DISASSOCIATE, GFP_ATOMIC);
9363} 9866}
9364EXPORT_SYMBOL(cfg80211_send_unprot_disassoc); 9867EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt);
9365 9868
9366static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, 9869static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
9367 struct net_device *netdev, int cmd, 9870 struct net_device *netdev, int cmd,
@@ -9872,7 +10375,6 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
9872 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); 10375 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
9873 struct sk_buff *msg; 10376 struct sk_buff *msg;
9874 void *hdr; 10377 void *hdr;
9875 int err;
9876 u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); 10378 u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
9877 10379
9878 if (!nlportid) 10380 if (!nlportid)
@@ -9893,12 +10395,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
9893 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr)) 10395 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr))
9894 goto nla_put_failure; 10396 goto nla_put_failure;
9895 10397
9896 err = genlmsg_end(msg, hdr); 10398 genlmsg_end(msg, hdr);
9897 if (err < 0) {
9898 nlmsg_free(msg);
9899 return true;
9900 }
9901
9902 genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); 10399 genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
9903 return true; 10400 return true;
9904 10401
@@ -9953,7 +10450,7 @@ EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
9953int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, 10450int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
9954 struct wireless_dev *wdev, u32 nlportid, 10451 struct wireless_dev *wdev, u32 nlportid,
9955 int freq, int sig_dbm, 10452 int freq, int sig_dbm,
9956 const u8 *buf, size_t len, gfp_t gfp) 10453 const u8 *buf, size_t len, u32 flags, gfp_t gfp)
9957{ 10454{
9958 struct net_device *netdev = wdev->netdev; 10455 struct net_device *netdev = wdev->netdev;
9959 struct sk_buff *msg; 10456 struct sk_buff *msg;
@@ -9976,7 +10473,9 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
9976 nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) || 10473 nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) ||
9977 (sig_dbm && 10474 (sig_dbm &&
9978 nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || 10475 nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
9979 nla_put(msg, NL80211_ATTR_FRAME, len, buf)) 10476 nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
10477 (flags &&
10478 nla_put_u32(msg, NL80211_ATTR_RXMGMT_FLAGS, flags)))
9980 goto nla_put_failure; 10479 goto nla_put_failure;
9981 10480
9982 genlmsg_end(msg, hdr); 10481 genlmsg_end(msg, hdr);
@@ -10021,7 +10520,8 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
10021 10520
10022 genlmsg_end(msg, hdr); 10521 genlmsg_end(msg, hdr);
10023 10522
10024 genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); 10523 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
10524 nl80211_mlme_mcgrp.id, gfp);
10025 return; 10525 return;
10026 10526
10027 nla_put_failure: 10527 nla_put_failure:
@@ -10341,10 +10841,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
10341 if (nl80211_send_chandef(msg, chandef)) 10841 if (nl80211_send_chandef(msg, chandef))
10342 goto nla_put_failure; 10842 goto nla_put_failure;
10343 10843
10344 if (genlmsg_end(msg, hdr) < 0) { 10844 genlmsg_end(msg, hdr);
10345 nlmsg_free(msg);
10346 return;
10347 }
10348 10845
10349 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, 10846 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
10350 nl80211_mlme_mcgrp.id, gfp); 10847 nl80211_mlme_mcgrp.id, gfp);
@@ -10410,7 +10907,6 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
10410 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); 10907 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
10411 struct sk_buff *msg; 10908 struct sk_buff *msg;
10412 void *hdr; 10909 void *hdr;
10413 int err;
10414 10910
10415 trace_cfg80211_probe_status(dev, addr, cookie, acked); 10911 trace_cfg80211_probe_status(dev, addr, cookie, acked);
10416 10912
@@ -10432,11 +10928,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
10432 (acked && nla_put_flag(msg, NL80211_ATTR_ACK))) 10928 (acked && nla_put_flag(msg, NL80211_ATTR_ACK)))
10433 goto nla_put_failure; 10929 goto nla_put_failure;
10434 10930
10435 err = genlmsg_end(msg, hdr); 10931 genlmsg_end(msg, hdr);
10436 if (err < 0) {
10437 nlmsg_free(msg);
10438 return;
10439 }
10440 10932
10441 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, 10933 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
10442 nl80211_mlme_mcgrp.id, gfp); 10934 nl80211_mlme_mcgrp.id, gfp);
@@ -10502,7 +10994,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev,
10502 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); 10994 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
10503 struct sk_buff *msg; 10995 struct sk_buff *msg;
10504 void *hdr; 10996 void *hdr;
10505 int err, size = 200; 10997 int size = 200;
10506 10998
10507 trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); 10999 trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup);
10508 11000
@@ -10588,9 +11080,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev,
10588 nla_nest_end(msg, reasons); 11080 nla_nest_end(msg, reasons);
10589 } 11081 }
10590 11082
10591 err = genlmsg_end(msg, hdr); 11083 genlmsg_end(msg, hdr);
10592 if (err < 0)
10593 goto free_msg;
10594 11084
10595 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, 11085 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
10596 nl80211_mlme_mcgrp.id, gfp); 11086 nl80211_mlme_mcgrp.id, gfp);
@@ -10610,7 +11100,6 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
10610 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); 11100 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
10611 struct sk_buff *msg; 11101 struct sk_buff *msg;
10612 void *hdr; 11102 void *hdr;
10613 int err;
10614 11103
10615 trace_cfg80211_tdls_oper_request(wdev->wiphy, dev, peer, oper, 11104 trace_cfg80211_tdls_oper_request(wdev->wiphy, dev, peer, oper,
10616 reason_code); 11105 reason_code);
@@ -10633,11 +11122,7 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
10633 nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason_code))) 11122 nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason_code)))
10634 goto nla_put_failure; 11123 goto nla_put_failure;
10635 11124
10636 err = genlmsg_end(msg, hdr); 11125 genlmsg_end(msg, hdr);
10637 if (err < 0) {
10638 nlmsg_free(msg);
10639 return;
10640 }
10641 11126
10642 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, 11127 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
10643 nl80211_mlme_mcgrp.id, gfp); 11128 nl80211_mlme_mcgrp.id, gfp);
@@ -10695,7 +11180,6 @@ void cfg80211_ft_event(struct net_device *netdev,
10695 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 11180 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
10696 struct sk_buff *msg; 11181 struct sk_buff *msg;
10697 void *hdr; 11182 void *hdr;
10698 int err;
10699 11183
10700 trace_cfg80211_ft_event(wiphy, netdev, ft_event); 11184 trace_cfg80211_ft_event(wiphy, netdev, ft_event);
10701 11185
@@ -10721,11 +11205,7 @@ void cfg80211_ft_event(struct net_device *netdev,
10721 nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, 11205 nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len,
10722 ft_event->ric_ies); 11206 ft_event->ric_ies);
10723 11207
10724 err = genlmsg_end(msg, hdr); 11208 genlmsg_end(msg, hdr);
10725 if (err < 0) {
10726 nlmsg_free(msg);
10727 return;
10728 }
10729 11209
10730 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, 11210 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
10731 nl80211_mlme_mcgrp.id, GFP_KERNEL); 11211 nl80211_mlme_mcgrp.id, GFP_KERNEL);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a4073e808c13..2c0f2b3c07cb 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -66,7 +66,7 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
66int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, 66int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
67 struct wireless_dev *wdev, u32 nlpid, 67 struct wireless_dev *wdev, u32 nlpid,
68 int freq, int sig_dbm, 68 int freq, int sig_dbm,
69 const u8 *buf, size_t len, gfp_t gfp); 69 const u8 *buf, size_t len, u32 flags, gfp_t gfp);
70 70
71void 71void
72nl80211_radar_notify(struct cfg80211_registered_device *rdev, 72nl80211_radar_notify(struct cfg80211_registered_device *rdev,
@@ -74,4 +74,6 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
74 enum nl80211_radar_event event, 74 enum nl80211_radar_event event,
75 struct net_device *netdev, gfp_t gfp); 75 struct net_device *netdev, gfp_t gfp);
76 76
77void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev);
78
77#endif /* __NET_WIRELESS_NL80211_H */ 79#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 9f15f0ac824d..37ce9fdfe934 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -516,11 +516,12 @@ static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev)
516 516
517#ifdef CONFIG_NL80211_TESTMODE 517#ifdef CONFIG_NL80211_TESTMODE
518static inline int rdev_testmode_cmd(struct cfg80211_registered_device *rdev, 518static inline int rdev_testmode_cmd(struct cfg80211_registered_device *rdev,
519 struct wireless_dev *wdev,
519 void *data, int len) 520 void *data, int len)
520{ 521{
521 int ret; 522 int ret;
522 trace_rdev_testmode_cmd(&rdev->wiphy); 523 trace_rdev_testmode_cmd(&rdev->wiphy, wdev);
523 ret = rdev->ops->testmode_cmd(&rdev->wiphy, data, len); 524 ret = rdev->ops->testmode_cmd(&rdev->wiphy, wdev, data, len);
524 trace_rdev_return_int(&rdev->wiphy, ret); 525 trace_rdev_return_int(&rdev->wiphy, ret);
525 return ret; 526 return ret;
526} 527}
@@ -923,4 +924,16 @@ static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev,
923 trace_rdev_return_void(&rdev->wiphy); 924 trace_rdev_return_void(&rdev->wiphy);
924} 925}
925 926
927static inline int rdev_channel_switch(struct cfg80211_registered_device *rdev,
928 struct net_device *dev,
929 struct cfg80211_csa_settings *params)
930{
931 int ret;
932
933 trace_rdev_channel_switch(&rdev->wiphy, dev, params);
934 ret = rdev->ops->channel_switch(&rdev->wiphy, dev, params);
935 trace_rdev_return_int(&rdev->wiphy, ret);
936 return ret;
937}
938
926#endif /* __CFG80211_RDEV_OPS */ 939#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index cc35fbaa4578..de06d5d1287f 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -81,7 +81,10 @@ static struct regulatory_request core_request_world = {
81 .country_ie_env = ENVIRON_ANY, 81 .country_ie_env = ENVIRON_ANY,
82}; 82};
83 83
84/* Receipt of information from last regulatory request */ 84/*
85 * Receipt of information from last regulatory request,
86 * protected by RTNL (and can be accessed with RCU protection)
87 */
85static struct regulatory_request __rcu *last_request = 88static struct regulatory_request __rcu *last_request =
86 (void __rcu *)&core_request_world; 89 (void __rcu *)&core_request_world;
87 90
@@ -96,39 +99,25 @@ static struct device_type reg_device_type = {
96 * Central wireless core regulatory domains, we only need two, 99 * Central wireless core regulatory domains, we only need two,
97 * the current one and a world regulatory domain in case we have no 100 * the current one and a world regulatory domain in case we have no
98 * information to give us an alpha2. 101 * information to give us an alpha2.
102 * (protected by RTNL, can be read under RCU)
99 */ 103 */
100const struct ieee80211_regdomain __rcu *cfg80211_regdomain; 104const struct ieee80211_regdomain __rcu *cfg80211_regdomain;
101 105
102/* 106/*
103 * Protects static reg.c components:
104 * - cfg80211_regdomain (if not used with RCU)
105 * - cfg80211_world_regdom
106 * - last_request (if not used with RCU)
107 * - reg_num_devs_support_basehint
108 */
109static DEFINE_MUTEX(reg_mutex);
110
111/*
112 * Number of devices that registered to the core 107 * Number of devices that registered to the core
113 * that support cellular base station regulatory hints 108 * that support cellular base station regulatory hints
109 * (protected by RTNL)
114 */ 110 */
115static int reg_num_devs_support_basehint; 111static int reg_num_devs_support_basehint;
116 112
117static inline void assert_reg_lock(void)
118{
119 lockdep_assert_held(&reg_mutex);
120}
121
122static const struct ieee80211_regdomain *get_cfg80211_regdom(void) 113static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
123{ 114{
124 return rcu_dereference_protected(cfg80211_regdomain, 115 return rtnl_dereference(cfg80211_regdomain);
125 lockdep_is_held(&reg_mutex));
126} 116}
127 117
128static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) 118static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy)
129{ 119{
130 return rcu_dereference_protected(wiphy->regd, 120 return rtnl_dereference(wiphy->regd);
131 lockdep_is_held(&reg_mutex));
132} 121}
133 122
134static void rcu_free_regdom(const struct ieee80211_regdomain *r) 123static void rcu_free_regdom(const struct ieee80211_regdomain *r)
@@ -140,8 +129,7 @@ static void rcu_free_regdom(const struct ieee80211_regdomain *r)
140 129
141static struct regulatory_request *get_last_request(void) 130static struct regulatory_request *get_last_request(void)
142{ 131{
143 return rcu_dereference_check(last_request, 132 return rcu_dereference_rtnl(last_request);
144 lockdep_is_held(&reg_mutex));
145} 133}
146 134
147/* Used to queue up regulatory hints */ 135/* Used to queue up regulatory hints */
@@ -200,6 +188,7 @@ static const struct ieee80211_regdomain world_regdom = {
200 } 188 }
201}; 189};
202 190
191/* protected by RTNL */
203static const struct ieee80211_regdomain *cfg80211_world_regdom = 192static const struct ieee80211_regdomain *cfg80211_world_regdom =
204 &world_regdom; 193 &world_regdom;
205 194
@@ -215,7 +204,7 @@ static void reset_regdomains(bool full_reset,
215 const struct ieee80211_regdomain *r; 204 const struct ieee80211_regdomain *r;
216 struct regulatory_request *lr; 205 struct regulatory_request *lr;
217 206
218 assert_reg_lock(); 207 ASSERT_RTNL();
219 208
220 r = get_cfg80211_regdom(); 209 r = get_cfg80211_regdom();
221 210
@@ -377,7 +366,7 @@ static void reg_regdb_search(struct work_struct *work)
377 const struct ieee80211_regdomain *curdom, *regdom = NULL; 366 const struct ieee80211_regdomain *curdom, *regdom = NULL;
378 int i; 367 int i;
379 368
380 mutex_lock(&cfg80211_mutex); 369 rtnl_lock();
381 370
382 mutex_lock(&reg_regdb_search_mutex); 371 mutex_lock(&reg_regdb_search_mutex);
383 while (!list_empty(&reg_regdb_search_list)) { 372 while (!list_empty(&reg_regdb_search_list)) {
@@ -402,7 +391,7 @@ static void reg_regdb_search(struct work_struct *work)
402 if (!IS_ERR_OR_NULL(regdom)) 391 if (!IS_ERR_OR_NULL(regdom))
403 set_regdom(regdom); 392 set_regdom(regdom);
404 393
405 mutex_unlock(&cfg80211_mutex); 394 rtnl_unlock();
406} 395}
407 396
408static DECLARE_WORK(reg_regdb_work, reg_regdb_search); 397static DECLARE_WORK(reg_regdb_work, reg_regdb_search);
@@ -936,13 +925,7 @@ static bool reg_request_cell_base(struct regulatory_request *request)
936 925
937bool reg_last_request_cell_base(void) 926bool reg_last_request_cell_base(void)
938{ 927{
939 bool val; 928 return reg_request_cell_base(get_last_request());
940
941 mutex_lock(&reg_mutex);
942 val = reg_request_cell_base(get_last_request());
943 mutex_unlock(&reg_mutex);
944
945 return val;
946} 929}
947 930
948#ifdef CONFIG_CFG80211_CERTIFICATION_ONUS 931#ifdef CONFIG_CFG80211_CERTIFICATION_ONUS
@@ -1225,7 +1208,7 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator)
1225 struct cfg80211_registered_device *rdev; 1208 struct cfg80211_registered_device *rdev;
1226 struct wiphy *wiphy; 1209 struct wiphy *wiphy;
1227 1210
1228 assert_cfg80211_lock(); 1211 ASSERT_RTNL();
1229 1212
1230 list_for_each_entry(rdev, &cfg80211_rdev_list, list) { 1213 list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
1231 wiphy = &rdev->wiphy; 1214 wiphy = &rdev->wiphy;
@@ -1362,7 +1345,7 @@ get_reg_request_treatment(struct wiphy *wiphy,
1362 return REG_REQ_OK; 1345 return REG_REQ_OK;
1363 return REG_REQ_ALREADY_SET; 1346 return REG_REQ_ALREADY_SET;
1364 } 1347 }
1365 return 0; 1348 return REG_REQ_OK;
1366 case NL80211_REGDOM_SET_BY_DRIVER: 1349 case NL80211_REGDOM_SET_BY_DRIVER:
1367 if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { 1350 if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) {
1368 if (regdom_changes(pending_request->alpha2)) 1351 if (regdom_changes(pending_request->alpha2))
@@ -1444,8 +1427,6 @@ static void reg_set_request_processed(void)
1444 * what it believes should be the current regulatory domain. 1427 * what it believes should be the current regulatory domain.
1445 * 1428 *
1446 * Returns one of the different reg request treatment values. 1429 * Returns one of the different reg request treatment values.
1447 *
1448 * Caller must hold &reg_mutex
1449 */ 1430 */
1450static enum reg_request_treatment 1431static enum reg_request_treatment
1451__regulatory_hint(struct wiphy *wiphy, 1432__regulatory_hint(struct wiphy *wiphy,
@@ -1570,21 +1551,19 @@ static void reg_process_pending_hints(void)
1570{ 1551{
1571 struct regulatory_request *reg_request, *lr; 1552 struct regulatory_request *reg_request, *lr;
1572 1553
1573 mutex_lock(&cfg80211_mutex);
1574 mutex_lock(&reg_mutex);
1575 lr = get_last_request(); 1554 lr = get_last_request();
1576 1555
1577 /* When last_request->processed becomes true this will be rescheduled */ 1556 /* When last_request->processed becomes true this will be rescheduled */
1578 if (lr && !lr->processed) { 1557 if (lr && !lr->processed) {
1579 REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); 1558 REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n");
1580 goto out; 1559 return;
1581 } 1560 }
1582 1561
1583 spin_lock(&reg_requests_lock); 1562 spin_lock(&reg_requests_lock);
1584 1563
1585 if (list_empty(&reg_requests_list)) { 1564 if (list_empty(&reg_requests_list)) {
1586 spin_unlock(&reg_requests_lock); 1565 spin_unlock(&reg_requests_lock);
1587 goto out; 1566 return;
1588 } 1567 }
1589 1568
1590 reg_request = list_first_entry(&reg_requests_list, 1569 reg_request = list_first_entry(&reg_requests_list,
@@ -1595,10 +1574,6 @@ static void reg_process_pending_hints(void)
1595 spin_unlock(&reg_requests_lock); 1574 spin_unlock(&reg_requests_lock);
1596 1575
1597 reg_process_hint(reg_request, reg_request->initiator); 1576 reg_process_hint(reg_request, reg_request->initiator);
1598
1599out:
1600 mutex_unlock(&reg_mutex);
1601 mutex_unlock(&cfg80211_mutex);
1602} 1577}
1603 1578
1604/* Processes beacon hints -- this has nothing to do with country IEs */ 1579/* Processes beacon hints -- this has nothing to do with country IEs */
@@ -1607,9 +1582,6 @@ static void reg_process_pending_beacon_hints(void)
1607 struct cfg80211_registered_device *rdev; 1582 struct cfg80211_registered_device *rdev;
1608 struct reg_beacon *pending_beacon, *tmp; 1583 struct reg_beacon *pending_beacon, *tmp;
1609 1584
1610 mutex_lock(&cfg80211_mutex);
1611 mutex_lock(&reg_mutex);
1612
1613 /* This goes through the _pending_ beacon list */ 1585 /* This goes through the _pending_ beacon list */
1614 spin_lock_bh(&reg_pending_beacons_lock); 1586 spin_lock_bh(&reg_pending_beacons_lock);
1615 1587
@@ -1626,14 +1598,14 @@ static void reg_process_pending_beacon_hints(void)
1626 } 1598 }
1627 1599
1628 spin_unlock_bh(&reg_pending_beacons_lock); 1600 spin_unlock_bh(&reg_pending_beacons_lock);
1629 mutex_unlock(&reg_mutex);
1630 mutex_unlock(&cfg80211_mutex);
1631} 1601}
1632 1602
1633static void reg_todo(struct work_struct *work) 1603static void reg_todo(struct work_struct *work)
1634{ 1604{
1605 rtnl_lock();
1635 reg_process_pending_hints(); 1606 reg_process_pending_hints();
1636 reg_process_pending_beacon_hints(); 1607 reg_process_pending_beacon_hints();
1608 rtnl_unlock();
1637} 1609}
1638 1610
1639static void queue_regulatory_request(struct regulatory_request *request) 1611static void queue_regulatory_request(struct regulatory_request *request)
@@ -1717,29 +1689,23 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
1717} 1689}
1718EXPORT_SYMBOL(regulatory_hint); 1690EXPORT_SYMBOL(regulatory_hint);
1719 1691
1720/*
1721 * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and
1722 * therefore cannot iterate over the rdev list here.
1723 */
1724void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, 1692void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band,
1725 const u8 *country_ie, u8 country_ie_len) 1693 const u8 *country_ie, u8 country_ie_len)
1726{ 1694{
1727 char alpha2[2]; 1695 char alpha2[2];
1728 enum environment_cap env = ENVIRON_ANY; 1696 enum environment_cap env = ENVIRON_ANY;
1729 struct regulatory_request *request, *lr; 1697 struct regulatory_request *request = NULL, *lr;
1730
1731 mutex_lock(&reg_mutex);
1732 lr = get_last_request();
1733
1734 if (unlikely(!lr))
1735 goto out;
1736 1698
1737 /* IE len must be evenly divisible by 2 */ 1699 /* IE len must be evenly divisible by 2 */
1738 if (country_ie_len & 0x01) 1700 if (country_ie_len & 0x01)
1739 goto out; 1701 return;
1740 1702
1741 if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) 1703 if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN)
1742 goto out; 1704 return;
1705
1706 request = kzalloc(sizeof(*request), GFP_KERNEL);
1707 if (!request)
1708 return;
1743 1709
1744 alpha2[0] = country_ie[0]; 1710 alpha2[0] = country_ie[0];
1745 alpha2[1] = country_ie[1]; 1711 alpha2[1] = country_ie[1];
@@ -1749,19 +1715,21 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band,
1749 else if (country_ie[2] == 'O') 1715 else if (country_ie[2] == 'O')
1750 env = ENVIRON_OUTDOOR; 1716 env = ENVIRON_OUTDOOR;
1751 1717
1718 rcu_read_lock();
1719 lr = get_last_request();
1720
1721 if (unlikely(!lr))
1722 goto out;
1723
1752 /* 1724 /*
1753 * We will run this only upon a successful connection on cfg80211. 1725 * We will run this only upon a successful connection on cfg80211.
1754 * We leave conflict resolution to the workqueue, where can hold 1726 * We leave conflict resolution to the workqueue, where can hold
1755 * cfg80211_mutex. 1727 * the RTNL.
1756 */ 1728 */
1757 if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && 1729 if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
1758 lr->wiphy_idx != WIPHY_IDX_INVALID) 1730 lr->wiphy_idx != WIPHY_IDX_INVALID)
1759 goto out; 1731 goto out;
1760 1732
1761 request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
1762 if (!request)
1763 goto out;
1764
1765 request->wiphy_idx = get_wiphy_idx(wiphy); 1733 request->wiphy_idx = get_wiphy_idx(wiphy);
1766 request->alpha2[0] = alpha2[0]; 1734 request->alpha2[0] = alpha2[0];
1767 request->alpha2[1] = alpha2[1]; 1735 request->alpha2[1] = alpha2[1];
@@ -1769,8 +1737,10 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band,
1769 request->country_ie_env = env; 1737 request->country_ie_env = env;
1770 1738
1771 queue_regulatory_request(request); 1739 queue_regulatory_request(request);
1740 request = NULL;
1772out: 1741out:
1773 mutex_unlock(&reg_mutex); 1742 kfree(request);
1743 rcu_read_unlock();
1774} 1744}
1775 1745
1776static void restore_alpha2(char *alpha2, bool reset_user) 1746static void restore_alpha2(char *alpha2, bool reset_user)
@@ -1858,8 +1828,7 @@ static void restore_regulatory_settings(bool reset_user)
1858 LIST_HEAD(tmp_reg_req_list); 1828 LIST_HEAD(tmp_reg_req_list);
1859 struct cfg80211_registered_device *rdev; 1829 struct cfg80211_registered_device *rdev;
1860 1830
1861 mutex_lock(&cfg80211_mutex); 1831 ASSERT_RTNL();
1862 mutex_lock(&reg_mutex);
1863 1832
1864 reset_regdomains(true, &world_regdom); 1833 reset_regdomains(true, &world_regdom);
1865 restore_alpha2(alpha2, reset_user); 1834 restore_alpha2(alpha2, reset_user);
@@ -1914,9 +1883,6 @@ static void restore_regulatory_settings(bool reset_user)
1914 list_splice_tail_init(&tmp_reg_req_list, &reg_requests_list); 1883 list_splice_tail_init(&tmp_reg_req_list, &reg_requests_list);
1915 spin_unlock(&reg_requests_lock); 1884 spin_unlock(&reg_requests_lock);
1916 1885
1917 mutex_unlock(&reg_mutex);
1918 mutex_unlock(&cfg80211_mutex);
1919
1920 REG_DBG_PRINT("Kicking the queue\n"); 1886 REG_DBG_PRINT("Kicking the queue\n");
1921 1887
1922 schedule_work(&reg_work); 1888 schedule_work(&reg_work);
@@ -2231,7 +2197,6 @@ int set_regdom(const struct ieee80211_regdomain *rd)
2231 struct regulatory_request *lr; 2197 struct regulatory_request *lr;
2232 int r; 2198 int r;
2233 2199
2234 mutex_lock(&reg_mutex);
2235 lr = get_last_request(); 2200 lr = get_last_request();
2236 2201
2237 /* Note that this doesn't update the wiphys, this is done below */ 2202 /* Note that this doesn't update the wiphys, this is done below */
@@ -2241,14 +2206,12 @@ int set_regdom(const struct ieee80211_regdomain *rd)
2241 reg_set_request_processed(); 2206 reg_set_request_processed();
2242 2207
2243 kfree(rd); 2208 kfree(rd);
2244 goto out; 2209 return r;
2245 } 2210 }
2246 2211
2247 /* This would make this whole thing pointless */ 2212 /* This would make this whole thing pointless */
2248 if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) { 2213 if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom()))
2249 r = -EINVAL; 2214 return -EINVAL;
2250 goto out;
2251 }
2252 2215
2253 /* update all wiphys now with the new established regulatory domain */ 2216 /* update all wiphys now with the new established regulatory domain */
2254 update_all_wiphy_regulatory(lr->initiator); 2217 update_all_wiphy_regulatory(lr->initiator);
@@ -2259,10 +2222,7 @@ int set_regdom(const struct ieee80211_regdomain *rd)
2259 2222
2260 reg_set_request_processed(); 2223 reg_set_request_processed();
2261 2224
2262 out: 2225 return 0;
2263 mutex_unlock(&reg_mutex);
2264
2265 return r;
2266} 2226}
2267 2227
2268int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) 2228int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
@@ -2287,23 +2247,20 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
2287 2247
2288void wiphy_regulatory_register(struct wiphy *wiphy) 2248void wiphy_regulatory_register(struct wiphy *wiphy)
2289{ 2249{
2290 mutex_lock(&reg_mutex); 2250 struct regulatory_request *lr;
2291 2251
2292 if (!reg_dev_ignore_cell_hint(wiphy)) 2252 if (!reg_dev_ignore_cell_hint(wiphy))
2293 reg_num_devs_support_basehint++; 2253 reg_num_devs_support_basehint++;
2294 2254
2295 wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); 2255 lr = get_last_request();
2296 2256 wiphy_update_regulatory(wiphy, lr->initiator);
2297 mutex_unlock(&reg_mutex);
2298} 2257}
2299 2258
2300/* Caller must hold cfg80211_mutex */
2301void wiphy_regulatory_deregister(struct wiphy *wiphy) 2259void wiphy_regulatory_deregister(struct wiphy *wiphy)
2302{ 2260{
2303 struct wiphy *request_wiphy = NULL; 2261 struct wiphy *request_wiphy = NULL;
2304 struct regulatory_request *lr; 2262 struct regulatory_request *lr;
2305 2263
2306 mutex_lock(&reg_mutex);
2307 lr = get_last_request(); 2264 lr = get_last_request();
2308 2265
2309 if (!reg_dev_ignore_cell_hint(wiphy)) 2266 if (!reg_dev_ignore_cell_hint(wiphy))
@@ -2316,18 +2273,18 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy)
2316 request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); 2273 request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
2317 2274
2318 if (!request_wiphy || request_wiphy != wiphy) 2275 if (!request_wiphy || request_wiphy != wiphy)
2319 goto out; 2276 return;
2320 2277
2321 lr->wiphy_idx = WIPHY_IDX_INVALID; 2278 lr->wiphy_idx = WIPHY_IDX_INVALID;
2322 lr->country_ie_env = ENVIRON_ANY; 2279 lr->country_ie_env = ENVIRON_ANY;
2323out:
2324 mutex_unlock(&reg_mutex);
2325} 2280}
2326 2281
2327static void reg_timeout_work(struct work_struct *work) 2282static void reg_timeout_work(struct work_struct *work)
2328{ 2283{
2329 REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); 2284 REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
2285 rtnl_lock();
2330 restore_regulatory_settings(true); 2286 restore_regulatory_settings(true);
2287 rtnl_unlock();
2331} 2288}
2332 2289
2333int __init regulatory_init(void) 2290int __init regulatory_init(void)
@@ -2385,9 +2342,9 @@ void regulatory_exit(void)
2385 cancel_delayed_work_sync(&reg_timeout); 2342 cancel_delayed_work_sync(&reg_timeout);
2386 2343
2387 /* Lock to suppress warnings */ 2344 /* Lock to suppress warnings */
2388 mutex_lock(&reg_mutex); 2345 rtnl_lock();
2389 reset_regdomains(true, NULL); 2346 reset_regdomains(true, NULL);
2390 mutex_unlock(&reg_mutex); 2347 rtnl_unlock();
2391 2348
2392 dev_set_uevent_suppress(&reg_pdev->dev, true); 2349 dev_set_uevent_suppress(&reg_pdev->dev, true);
2393 2350
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index fd99ea495b7e..eeb71480f1af 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -169,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak)
169 union iwreq_data wrqu; 169 union iwreq_data wrqu;
170#endif 170#endif
171 171
172 lockdep_assert_held(&rdev->sched_scan_mtx); 172 ASSERT_RTNL();
173 173
174 request = rdev->scan_req; 174 request = rdev->scan_req;
175 175
@@ -230,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk)
230 rdev = container_of(wk, struct cfg80211_registered_device, 230 rdev = container_of(wk, struct cfg80211_registered_device,
231 scan_done_wk); 231 scan_done_wk);
232 232
233 mutex_lock(&rdev->sched_scan_mtx); 233 rtnl_lock();
234 ___cfg80211_scan_done(rdev, false); 234 ___cfg80211_scan_done(rdev, false);
235 mutex_unlock(&rdev->sched_scan_mtx); 235 rtnl_unlock();
236} 236}
237 237
238void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) 238void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
@@ -241,6 +241,7 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
241 WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); 241 WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req);
242 242
243 request->aborted = aborted; 243 request->aborted = aborted;
244 request->notified = true;
244 queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk); 245 queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk);
245} 246}
246EXPORT_SYMBOL(cfg80211_scan_done); 247EXPORT_SYMBOL(cfg80211_scan_done);
@@ -255,7 +256,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk)
255 256
256 request = rdev->sched_scan_req; 257 request = rdev->sched_scan_req;
257 258
258 mutex_lock(&rdev->sched_scan_mtx); 259 rtnl_lock();
259 260
260 /* we don't have sched_scan_req anymore if the scan is stopping */ 261 /* we don't have sched_scan_req anymore if the scan is stopping */
261 if (request) { 262 if (request) {
@@ -270,7 +271,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk)
270 nl80211_send_sched_scan_results(rdev, request->dev); 271 nl80211_send_sched_scan_results(rdev, request->dev);
271 } 272 }
272 273
273 mutex_unlock(&rdev->sched_scan_mtx); 274 rtnl_unlock();
274} 275}
275 276
276void cfg80211_sched_scan_results(struct wiphy *wiphy) 277void cfg80211_sched_scan_results(struct wiphy *wiphy)
@@ -289,9 +290,9 @@ void cfg80211_sched_scan_stopped(struct wiphy *wiphy)
289 290
290 trace_cfg80211_sched_scan_stopped(wiphy); 291 trace_cfg80211_sched_scan_stopped(wiphy);
291 292
292 mutex_lock(&rdev->sched_scan_mtx); 293 rtnl_lock();
293 __cfg80211_stop_sched_scan(rdev, true); 294 __cfg80211_stop_sched_scan(rdev, true);
294 mutex_unlock(&rdev->sched_scan_mtx); 295 rtnl_unlock();
295} 296}
296EXPORT_SYMBOL(cfg80211_sched_scan_stopped); 297EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
297 298
@@ -300,7 +301,7 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
300{ 301{
301 struct net_device *dev; 302 struct net_device *dev;
302 303
303 lockdep_assert_held(&rdev->sched_scan_mtx); 304 ASSERT_RTNL();
304 305
305 if (!rdev->sched_scan_req) 306 if (!rdev->sched_scan_req)
306 return -ENOENT; 307 return -ENOENT;
@@ -464,10 +465,6 @@ static int cmp_bss(struct cfg80211_bss *a,
464 } 465 }
465 } 466 }
466 467
467 /*
468 * we can't use compare_ether_addr here since we need a < > operator.
469 * The binary return value of compare_ether_addr isn't enough
470 */
471 r = memcmp(a->bssid, b->bssid, sizeof(a->bssid)); 468 r = memcmp(a->bssid, b->bssid, sizeof(a->bssid));
472 if (r) 469 if (r)
473 return r; 470 return r;
@@ -522,6 +519,7 @@ static int cmp_bss(struct cfg80211_bss *a,
522 } 519 }
523} 520}
524 521
522/* Returned bss is reference counted and must be cleaned up appropriately. */
525struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, 523struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
526 struct ieee80211_channel *channel, 524 struct ieee80211_channel *channel,
527 const u8 *bssid, 525 const u8 *bssid,
@@ -649,6 +647,8 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev,
649 continue; 647 continue;
650 if (bss->pub.channel != new->pub.channel) 648 if (bss->pub.channel != new->pub.channel)
651 continue; 649 continue;
650 if (bss->pub.scan_width != new->pub.scan_width)
651 continue;
652 if (rcu_access_pointer(bss->pub.beacon_ies)) 652 if (rcu_access_pointer(bss->pub.beacon_ies))
653 continue; 653 continue;
654 ies = rcu_access_pointer(bss->pub.ies); 654 ies = rcu_access_pointer(bss->pub.ies);
@@ -677,6 +677,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev,
677 return true; 677 return true;
678} 678}
679 679
680/* Returned bss is reference counted and must be cleaned up appropriately. */
680static struct cfg80211_internal_bss * 681static struct cfg80211_internal_bss *
681cfg80211_bss_update(struct cfg80211_registered_device *dev, 682cfg80211_bss_update(struct cfg80211_registered_device *dev,
682 struct cfg80211_internal_bss *tmp) 683 struct cfg80211_internal_bss *tmp)
@@ -865,12 +866,14 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
865 return channel; 866 return channel;
866} 867}
867 868
869/* Returned bss is reference counted and must be cleaned up appropriately. */
868struct cfg80211_bss* 870struct cfg80211_bss*
869cfg80211_inform_bss(struct wiphy *wiphy, 871cfg80211_inform_bss_width(struct wiphy *wiphy,
870 struct ieee80211_channel *channel, 872 struct ieee80211_channel *channel,
871 const u8 *bssid, u64 tsf, u16 capability, 873 enum nl80211_bss_scan_width scan_width,
872 u16 beacon_interval, const u8 *ie, size_t ielen, 874 const u8 *bssid, u64 tsf, u16 capability,
873 s32 signal, gfp_t gfp) 875 u16 beacon_interval, const u8 *ie, size_t ielen,
876 s32 signal, gfp_t gfp)
874{ 877{
875 struct cfg80211_bss_ies *ies; 878 struct cfg80211_bss_ies *ies;
876 struct cfg80211_internal_bss tmp = {}, *res; 879 struct cfg80211_internal_bss tmp = {}, *res;
@@ -888,6 +891,7 @@ cfg80211_inform_bss(struct wiphy *wiphy,
888 891
889 memcpy(tmp.pub.bssid, bssid, ETH_ALEN); 892 memcpy(tmp.pub.bssid, bssid, ETH_ALEN);
890 tmp.pub.channel = channel; 893 tmp.pub.channel = channel;
894 tmp.pub.scan_width = scan_width;
891 tmp.pub.signal = signal; 895 tmp.pub.signal = signal;
892 tmp.pub.beacon_interval = beacon_interval; 896 tmp.pub.beacon_interval = beacon_interval;
893 tmp.pub.capability = capability; 897 tmp.pub.capability = capability;
@@ -920,13 +924,15 @@ cfg80211_inform_bss(struct wiphy *wiphy,
920 /* cfg80211_bss_update gives us a referenced result */ 924 /* cfg80211_bss_update gives us a referenced result */
921 return &res->pub; 925 return &res->pub;
922} 926}
923EXPORT_SYMBOL(cfg80211_inform_bss); 927EXPORT_SYMBOL(cfg80211_inform_bss_width);
924 928
929/* Returned bss is reference counted and must be cleaned up appropriately. */
925struct cfg80211_bss * 930struct cfg80211_bss *
926cfg80211_inform_bss_frame(struct wiphy *wiphy, 931cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
927 struct ieee80211_channel *channel, 932 struct ieee80211_channel *channel,
928 struct ieee80211_mgmt *mgmt, size_t len, 933 enum nl80211_bss_scan_width scan_width,
929 s32 signal, gfp_t gfp) 934 struct ieee80211_mgmt *mgmt, size_t len,
935 s32 signal, gfp_t gfp)
930{ 936{
931 struct cfg80211_internal_bss tmp = {}, *res; 937 struct cfg80211_internal_bss tmp = {}, *res;
932 struct cfg80211_bss_ies *ies; 938 struct cfg80211_bss_ies *ies;
@@ -936,7 +942,8 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
936 BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != 942 BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
937 offsetof(struct ieee80211_mgmt, u.beacon.variable)); 943 offsetof(struct ieee80211_mgmt, u.beacon.variable));
938 944
939 trace_cfg80211_inform_bss_frame(wiphy, channel, mgmt, len, signal); 945 trace_cfg80211_inform_bss_width_frame(wiphy, channel, scan_width, mgmt,
946 len, signal);
940 947
941 if (WARN_ON(!mgmt)) 948 if (WARN_ON(!mgmt))
942 return NULL; 949 return NULL;
@@ -971,6 +978,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
971 978
972 memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN); 979 memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN);
973 tmp.pub.channel = channel; 980 tmp.pub.channel = channel;
981 tmp.pub.scan_width = scan_width;
974 tmp.pub.signal = signal; 982 tmp.pub.signal = signal;
975 tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); 983 tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
976 tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); 984 tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
@@ -986,7 +994,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
986 /* cfg80211_bss_update gives us a referenced result */ 994 /* cfg80211_bss_update gives us a referenced result */
987 return &res->pub; 995 return &res->pub;
988} 996}
989EXPORT_SYMBOL(cfg80211_inform_bss_frame); 997EXPORT_SYMBOL(cfg80211_inform_bss_width_frame);
990 998
991void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) 999void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
992{ 1000{
@@ -1040,6 +1048,25 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
1040EXPORT_SYMBOL(cfg80211_unlink_bss); 1048EXPORT_SYMBOL(cfg80211_unlink_bss);
1041 1049
1042#ifdef CONFIG_CFG80211_WEXT 1050#ifdef CONFIG_CFG80211_WEXT
1051static struct cfg80211_registered_device *
1052cfg80211_get_dev_from_ifindex(struct net *net, int ifindex)
1053{
1054 struct cfg80211_registered_device *rdev;
1055 struct net_device *dev;
1056
1057 ASSERT_RTNL();
1058
1059 dev = dev_get_by_index(net, ifindex);
1060 if (!dev)
1061 return ERR_PTR(-ENODEV);
1062 if (dev->ieee80211_ptr)
1063 rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
1064 else
1065 rdev = ERR_PTR(-ENODEV);
1066 dev_put(dev);
1067 return rdev;
1068}
1069
1043int cfg80211_wext_siwscan(struct net_device *dev, 1070int cfg80211_wext_siwscan(struct net_device *dev,
1044 struct iw_request_info *info, 1071 struct iw_request_info *info,
1045 union iwreq_data *wrqu, char *extra) 1072 union iwreq_data *wrqu, char *extra)
@@ -1062,7 +1089,6 @@ int cfg80211_wext_siwscan(struct net_device *dev,
1062 if (IS_ERR(rdev)) 1089 if (IS_ERR(rdev))
1063 return PTR_ERR(rdev); 1090 return PTR_ERR(rdev);
1064 1091
1065 mutex_lock(&rdev->sched_scan_mtx);
1066 if (rdev->scan_req) { 1092 if (rdev->scan_req) {
1067 err = -EBUSY; 1093 err = -EBUSY;
1068 goto out; 1094 goto out;
@@ -1169,9 +1195,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
1169 dev_hold(dev); 1195 dev_hold(dev);
1170 } 1196 }
1171 out: 1197 out:
1172 mutex_unlock(&rdev->sched_scan_mtx);
1173 kfree(creq); 1198 kfree(creq);
1174 cfg80211_unlock_rdev(rdev);
1175 return err; 1199 return err;
1176} 1200}
1177EXPORT_SYMBOL_GPL(cfg80211_wext_siwscan); 1201EXPORT_SYMBOL_GPL(cfg80211_wext_siwscan);
@@ -1470,10 +1494,8 @@ int cfg80211_wext_giwscan(struct net_device *dev,
1470 if (IS_ERR(rdev)) 1494 if (IS_ERR(rdev))
1471 return PTR_ERR(rdev); 1495 return PTR_ERR(rdev);
1472 1496
1473 if (rdev->scan_req) { 1497 if (rdev->scan_req)
1474 res = -EAGAIN; 1498 return -EAGAIN;
1475 goto out;
1476 }
1477 1499
1478 res = ieee80211_scan_results(rdev, info, extra, data->length); 1500 res = ieee80211_scan_results(rdev, info, extra, data->length);
1479 data->length = 0; 1501 data->length = 0;
@@ -1482,8 +1504,6 @@ int cfg80211_wext_giwscan(struct net_device *dev,
1482 res = 0; 1504 res = 0;
1483 } 1505 }
1484 1506
1485 out:
1486 cfg80211_unlock_rdev(rdev);
1487 return res; 1507 return res;
1488} 1508}
1489EXPORT_SYMBOL_GPL(cfg80211_wext_giwscan); 1509EXPORT_SYMBOL_GPL(cfg80211_wext_giwscan);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 3ed35c345cae..20e86a95dc4e 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1,5 +1,7 @@
1/* 1/*
2 * SME code for cfg80211's connect emulation. 2 * SME code for cfg80211
3 * both driver SME event handling and the SME implementation
4 * (for nl80211's connect() and wext)
3 * 5 *
4 * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> 6 * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
5 * Copyright (C) 2009 Intel Corporation. All rights reserved. 7 * Copyright (C) 2009 Intel Corporation. All rights reserved.
@@ -18,18 +20,26 @@
18#include "reg.h" 20#include "reg.h"
19#include "rdev-ops.h" 21#include "rdev-ops.h"
20 22
23/*
24 * Software SME in cfg80211, using auth/assoc/deauth calls to the
25 * driver. This is is for implementing nl80211's connect/disconnect
26 * and wireless extensions (if configured.)
27 */
28
21struct cfg80211_conn { 29struct cfg80211_conn {
22 struct cfg80211_connect_params params; 30 struct cfg80211_connect_params params;
23 /* these are sub-states of the _CONNECTING sme_state */ 31 /* these are sub-states of the _CONNECTING sme_state */
24 enum { 32 enum {
25 CFG80211_CONN_IDLE,
26 CFG80211_CONN_SCANNING, 33 CFG80211_CONN_SCANNING,
27 CFG80211_CONN_SCAN_AGAIN, 34 CFG80211_CONN_SCAN_AGAIN,
28 CFG80211_CONN_AUTHENTICATE_NEXT, 35 CFG80211_CONN_AUTHENTICATE_NEXT,
29 CFG80211_CONN_AUTHENTICATING, 36 CFG80211_CONN_AUTHENTICATING,
37 CFG80211_CONN_AUTH_FAILED,
30 CFG80211_CONN_ASSOCIATE_NEXT, 38 CFG80211_CONN_ASSOCIATE_NEXT,
31 CFG80211_CONN_ASSOCIATING, 39 CFG80211_CONN_ASSOCIATING,
32 CFG80211_CONN_DEAUTH_ASSOC_FAIL, 40 CFG80211_CONN_ASSOC_FAILED,
41 CFG80211_CONN_DEAUTH,
42 CFG80211_CONN_CONNECTED,
33 } state; 43 } state;
34 u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; 44 u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
35 u8 *ie; 45 u8 *ie;
@@ -37,45 +47,16 @@ struct cfg80211_conn {
37 bool auto_auth, prev_bssid_valid; 47 bool auto_auth, prev_bssid_valid;
38}; 48};
39 49
40static bool cfg80211_is_all_idle(void) 50static void cfg80211_sme_free(struct wireless_dev *wdev)
41{
42 struct cfg80211_registered_device *rdev;
43 struct wireless_dev *wdev;
44 bool is_all_idle = true;
45
46 mutex_lock(&cfg80211_mutex);
47
48 /*
49 * All devices must be idle as otherwise if you are actively
50 * scanning some new beacon hints could be learned and would
51 * count as new regulatory hints.
52 */
53 list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
54 cfg80211_lock_rdev(rdev);
55 list_for_each_entry(wdev, &rdev->wdev_list, list) {
56 wdev_lock(wdev);
57 if (wdev->sme_state != CFG80211_SME_IDLE)
58 is_all_idle = false;
59 wdev_unlock(wdev);
60 }
61 cfg80211_unlock_rdev(rdev);
62 }
63
64 mutex_unlock(&cfg80211_mutex);
65
66 return is_all_idle;
67}
68
69static void disconnect_work(struct work_struct *work)
70{ 51{
71 if (!cfg80211_is_all_idle()) 52 if (!wdev->conn)
72 return; 53 return;
73 54
74 regulatory_hint_disconnect(); 55 kfree(wdev->conn->ie);
56 kfree(wdev->conn);
57 wdev->conn = NULL;
75} 58}
76 59
77static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
78
79static int cfg80211_conn_scan(struct wireless_dev *wdev) 60static int cfg80211_conn_scan(struct wireless_dev *wdev)
80{ 61{
81 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); 62 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
@@ -85,7 +66,6 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
85 ASSERT_RTNL(); 66 ASSERT_RTNL();
86 ASSERT_RDEV_LOCK(rdev); 67 ASSERT_RDEV_LOCK(rdev);
87 ASSERT_WDEV_LOCK(wdev); 68 ASSERT_WDEV_LOCK(wdev);
88 lockdep_assert_held(&rdev->sched_scan_mtx);
89 69
90 if (rdev->scan_req) 70 if (rdev->scan_req)
91 return -EBUSY; 71 return -EBUSY;
@@ -171,18 +151,23 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
171 params = &wdev->conn->params; 151 params = &wdev->conn->params;
172 152
173 switch (wdev->conn->state) { 153 switch (wdev->conn->state) {
154 case CFG80211_CONN_SCANNING:
155 /* didn't find it during scan ... */
156 return -ENOENT;
174 case CFG80211_CONN_SCAN_AGAIN: 157 case CFG80211_CONN_SCAN_AGAIN:
175 return cfg80211_conn_scan(wdev); 158 return cfg80211_conn_scan(wdev);
176 case CFG80211_CONN_AUTHENTICATE_NEXT: 159 case CFG80211_CONN_AUTHENTICATE_NEXT:
177 BUG_ON(!rdev->ops->auth); 160 BUG_ON(!rdev->ops->auth);
178 wdev->conn->state = CFG80211_CONN_AUTHENTICATING; 161 wdev->conn->state = CFG80211_CONN_AUTHENTICATING;
179 return __cfg80211_mlme_auth(rdev, wdev->netdev, 162 return cfg80211_mlme_auth(rdev, wdev->netdev,
180 params->channel, params->auth_type, 163 params->channel, params->auth_type,
181 params->bssid, 164 params->bssid,
182 params->ssid, params->ssid_len, 165 params->ssid, params->ssid_len,
183 NULL, 0, 166 NULL, 0,
184 params->key, params->key_len, 167 params->key, params->key_len,
185 params->key_idx, NULL, 0); 168 params->key_idx, NULL, 0);
169 case CFG80211_CONN_AUTH_FAILED:
170 return -ENOTCONN;
186 case CFG80211_CONN_ASSOCIATE_NEXT: 171 case CFG80211_CONN_ASSOCIATE_NEXT:
187 BUG_ON(!rdev->ops->assoc); 172 BUG_ON(!rdev->ops->assoc);
188 wdev->conn->state = CFG80211_CONN_ASSOCIATING; 173 wdev->conn->state = CFG80211_CONN_ASSOCIATING;
@@ -198,21 +183,27 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
198 req.vht_capa = params->vht_capa; 183 req.vht_capa = params->vht_capa;
199 req.vht_capa_mask = params->vht_capa_mask; 184 req.vht_capa_mask = params->vht_capa_mask;
200 185
201 err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, 186 err = cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel,
202 params->bssid, params->ssid, 187 params->bssid, params->ssid,
203 params->ssid_len, &req); 188 params->ssid_len, &req);
204 if (err) 189 if (err)
205 __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, 190 cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
206 NULL, 0, 191 NULL, 0,
207 WLAN_REASON_DEAUTH_LEAVING, 192 WLAN_REASON_DEAUTH_LEAVING,
208 false); 193 false);
209 return err; 194 return err;
210 case CFG80211_CONN_DEAUTH_ASSOC_FAIL: 195 case CFG80211_CONN_ASSOC_FAILED:
211 __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, 196 cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
212 NULL, 0, 197 NULL, 0,
213 WLAN_REASON_DEAUTH_LEAVING, false); 198 WLAN_REASON_DEAUTH_LEAVING, false);
214 /* return an error so that we call __cfg80211_connect_result() */ 199 return -ENOTCONN;
215 return -EINVAL; 200 case CFG80211_CONN_DEAUTH:
201 cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
202 NULL, 0,
203 WLAN_REASON_DEAUTH_LEAVING, false);
204 /* free directly, disconnected event already sent */
205 cfg80211_sme_free(wdev);
206 return 0;
216 default: 207 default:
217 return 0; 208 return 0;
218 } 209 }
@@ -226,9 +217,6 @@ void cfg80211_conn_work(struct work_struct *work)
226 u8 bssid_buf[ETH_ALEN], *bssid = NULL; 217 u8 bssid_buf[ETH_ALEN], *bssid = NULL;
227 218
228 rtnl_lock(); 219 rtnl_lock();
229 cfg80211_lock_rdev(rdev);
230 mutex_lock(&rdev->devlist_mtx);
231 mutex_lock(&rdev->sched_scan_mtx);
232 220
233 list_for_each_entry(wdev, &rdev->wdev_list, list) { 221 list_for_each_entry(wdev, &rdev->wdev_list, list) {
234 if (!wdev->netdev) 222 if (!wdev->netdev)
@@ -239,7 +227,8 @@ void cfg80211_conn_work(struct work_struct *work)
239 wdev_unlock(wdev); 227 wdev_unlock(wdev);
240 continue; 228 continue;
241 } 229 }
242 if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) { 230 if (!wdev->conn ||
231 wdev->conn->state == CFG80211_CONN_CONNECTED) {
243 wdev_unlock(wdev); 232 wdev_unlock(wdev);
244 continue; 233 continue;
245 } 234 }
@@ -247,21 +236,21 @@ void cfg80211_conn_work(struct work_struct *work)
247 memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN); 236 memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN);
248 bssid = bssid_buf; 237 bssid = bssid_buf;
249 } 238 }
250 if (cfg80211_conn_do_work(wdev)) 239 if (cfg80211_conn_do_work(wdev)) {
251 __cfg80211_connect_result( 240 __cfg80211_connect_result(
252 wdev->netdev, bssid, 241 wdev->netdev, bssid,
253 NULL, 0, NULL, 0, 242 NULL, 0, NULL, 0,
254 WLAN_STATUS_UNSPECIFIED_FAILURE, 243 WLAN_STATUS_UNSPECIFIED_FAILURE,
255 false, NULL); 244 false, NULL);
245 cfg80211_sme_free(wdev);
246 }
256 wdev_unlock(wdev); 247 wdev_unlock(wdev);
257 } 248 }
258 249
259 mutex_unlock(&rdev->sched_scan_mtx);
260 mutex_unlock(&rdev->devlist_mtx);
261 cfg80211_unlock_rdev(rdev);
262 rtnl_unlock(); 250 rtnl_unlock();
263} 251}
264 252
253/* Returned bss is reference counted and must be cleaned up appropriately. */
265static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) 254static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
266{ 255{
267 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); 256 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
@@ -299,9 +288,6 @@ static void __cfg80211_sme_scan_done(struct net_device *dev)
299 288
300 ASSERT_WDEV_LOCK(wdev); 289 ASSERT_WDEV_LOCK(wdev);
301 290
302 if (wdev->sme_state != CFG80211_SME_CONNECTING)
303 return;
304
305 if (!wdev->conn) 291 if (!wdev->conn)
306 return; 292 return;
307 293
@@ -310,20 +296,10 @@ static void __cfg80211_sme_scan_done(struct net_device *dev)
310 return; 296 return;
311 297
312 bss = cfg80211_get_conn_bss(wdev); 298 bss = cfg80211_get_conn_bss(wdev);
313 if (bss) { 299 if (bss)
314 cfg80211_put_bss(&rdev->wiphy, bss); 300 cfg80211_put_bss(&rdev->wiphy, bss);
315 } else { 301 else
316 /* not found */ 302 schedule_work(&rdev->conn_work);
317 if (wdev->conn->state == CFG80211_CONN_SCAN_AGAIN)
318 schedule_work(&rdev->conn_work);
319 else
320 __cfg80211_connect_result(
321 wdev->netdev,
322 wdev->conn->params.bssid,
323 NULL, 0, NULL, 0,
324 WLAN_STATUS_UNSPECIFIED_FAILURE,
325 false, NULL);
326 }
327} 303}
328 304
329void cfg80211_sme_scan_done(struct net_device *dev) 305void cfg80211_sme_scan_done(struct net_device *dev)
@@ -335,10 +311,8 @@ void cfg80211_sme_scan_done(struct net_device *dev)
335 wdev_unlock(wdev); 311 wdev_unlock(wdev);
336} 312}
337 313
338void cfg80211_sme_rx_auth(struct net_device *dev, 314void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len)
339 const u8 *buf, size_t len)
340{ 315{
341 struct wireless_dev *wdev = dev->ieee80211_ptr;
342 struct wiphy *wiphy = wdev->wiphy; 316 struct wiphy *wiphy = wdev->wiphy;
343 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 317 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
344 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; 318 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
@@ -346,11 +320,7 @@ void cfg80211_sme_rx_auth(struct net_device *dev,
346 320
347 ASSERT_WDEV_LOCK(wdev); 321 ASSERT_WDEV_LOCK(wdev);
348 322
349 /* should only RX auth frames when connecting */ 323 if (!wdev->conn || wdev->conn->state == CFG80211_CONN_CONNECTED)
350 if (wdev->sme_state != CFG80211_SME_CONNECTING)
351 return;
352
353 if (WARN_ON(!wdev->conn))
354 return; 324 return;
355 325
356 if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG && 326 if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG &&
@@ -379,46 +349,239 @@ void cfg80211_sme_rx_auth(struct net_device *dev,
379 wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; 349 wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
380 schedule_work(&rdev->conn_work); 350 schedule_work(&rdev->conn_work);
381 } else if (status_code != WLAN_STATUS_SUCCESS) { 351 } else if (status_code != WLAN_STATUS_SUCCESS) {
382 __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0, 352 __cfg80211_connect_result(wdev->netdev, mgmt->bssid,
353 NULL, 0, NULL, 0,
383 status_code, false, NULL); 354 status_code, false, NULL);
384 } else if (wdev->sme_state == CFG80211_SME_CONNECTING && 355 } else if (wdev->conn->state == CFG80211_CONN_AUTHENTICATING) {
385 wdev->conn->state == CFG80211_CONN_AUTHENTICATING) {
386 wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; 356 wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT;
387 schedule_work(&rdev->conn_work); 357 schedule_work(&rdev->conn_work);
388 } 358 }
389} 359}
390 360
391bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev) 361bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status)
392{ 362{
393 struct wiphy *wiphy = wdev->wiphy; 363 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
394 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
395 364
396 if (WARN_ON(!wdev->conn)) 365 if (!wdev->conn)
397 return false; 366 return false;
398 367
399 if (!wdev->conn->prev_bssid_valid) 368 if (status == WLAN_STATUS_SUCCESS) {
369 wdev->conn->state = CFG80211_CONN_CONNECTED;
400 return false; 370 return false;
371 }
401 372
402 /* 373 if (wdev->conn->prev_bssid_valid) {
403 * Some stupid APs don't accept reassoc, so we 374 /*
404 * need to fall back to trying regular assoc. 375 * Some stupid APs don't accept reassoc, so we
405 */ 376 * need to fall back to trying regular assoc;
406 wdev->conn->prev_bssid_valid = false; 377 * return true so no event is sent to userspace.
407 wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; 378 */
379 wdev->conn->prev_bssid_valid = false;
380 wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT;
381 schedule_work(&rdev->conn_work);
382 return true;
383 }
384
385 wdev->conn->state = CFG80211_CONN_ASSOC_FAILED;
408 schedule_work(&rdev->conn_work); 386 schedule_work(&rdev->conn_work);
387 return false;
388}
409 389
410 return true; 390void cfg80211_sme_deauth(struct wireless_dev *wdev)
391{
392 cfg80211_sme_free(wdev);
411} 393}
412 394
413void cfg80211_sme_failed_assoc(struct wireless_dev *wdev) 395void cfg80211_sme_auth_timeout(struct wireless_dev *wdev)
414{ 396{
415 struct wiphy *wiphy = wdev->wiphy; 397 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
416 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 398
399 if (!wdev->conn)
400 return;
401
402 wdev->conn->state = CFG80211_CONN_AUTH_FAILED;
403 schedule_work(&rdev->conn_work);
404}
405
406void cfg80211_sme_disassoc(struct wireless_dev *wdev)
407{
408 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
409
410 if (!wdev->conn)
411 return;
417 412
418 wdev->conn->state = CFG80211_CONN_DEAUTH_ASSOC_FAIL; 413 wdev->conn->state = CFG80211_CONN_DEAUTH;
419 schedule_work(&rdev->conn_work); 414 schedule_work(&rdev->conn_work);
420} 415}
421 416
417void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev)
418{
419 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
420
421 if (!wdev->conn)
422 return;
423
424 wdev->conn->state = CFG80211_CONN_ASSOC_FAILED;
425 schedule_work(&rdev->conn_work);
426}
427
428static int cfg80211_sme_connect(struct wireless_dev *wdev,
429 struct cfg80211_connect_params *connect,
430 const u8 *prev_bssid)
431{
432 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
433 struct cfg80211_bss *bss;
434 int err;
435
436 if (!rdev->ops->auth || !rdev->ops->assoc)
437 return -EOPNOTSUPP;
438
439 if (wdev->current_bss)
440 return -EALREADY;
441
442 if (WARN_ON(wdev->conn))
443 return -EINPROGRESS;
444
445 wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL);
446 if (!wdev->conn)
447 return -ENOMEM;
448
449 /*
450 * Copy all parameters, and treat explicitly IEs, BSSID, SSID.
451 */
452 memcpy(&wdev->conn->params, connect, sizeof(*connect));
453 if (connect->bssid) {
454 wdev->conn->params.bssid = wdev->conn->bssid;
455 memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN);
456 }
457
458 if (connect->ie) {
459 wdev->conn->ie = kmemdup(connect->ie, connect->ie_len,
460 GFP_KERNEL);
461 wdev->conn->params.ie = wdev->conn->ie;
462 if (!wdev->conn->ie) {
463 kfree(wdev->conn);
464 wdev->conn = NULL;
465 return -ENOMEM;
466 }
467 }
468
469 if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) {
470 wdev->conn->auto_auth = true;
471 /* start with open system ... should mostly work */
472 wdev->conn->params.auth_type =
473 NL80211_AUTHTYPE_OPEN_SYSTEM;
474 } else {
475 wdev->conn->auto_auth = false;
476 }
477
478 wdev->conn->params.ssid = wdev->ssid;
479 wdev->conn->params.ssid_len = connect->ssid_len;
480
481 /* see if we have the bss already */
482 bss = cfg80211_get_conn_bss(wdev);
483
484 if (prev_bssid) {
485 memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN);
486 wdev->conn->prev_bssid_valid = true;
487 }
488
489 /* we're good if we have a matching bss struct */
490 if (bss) {
491 wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
492 err = cfg80211_conn_do_work(wdev);
493 cfg80211_put_bss(wdev->wiphy, bss);
494 } else {
495 /* otherwise we'll need to scan for the AP first */
496 err = cfg80211_conn_scan(wdev);
497
498 /*
499 * If we can't scan right now, then we need to scan again
500 * after the current scan finished, since the parameters
501 * changed (unless we find a good AP anyway).
502 */
503 if (err == -EBUSY) {
504 err = 0;
505 wdev->conn->state = CFG80211_CONN_SCAN_AGAIN;
506 }
507 }
508
509 if (err)
510 cfg80211_sme_free(wdev);
511
512 return err;
513}
514
515static int cfg80211_sme_disconnect(struct wireless_dev *wdev, u16 reason)
516{
517 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
518 int err;
519
520 if (!wdev->conn)
521 return 0;
522
523 if (!rdev->ops->deauth)
524 return -EOPNOTSUPP;
525
526 if (wdev->conn->state == CFG80211_CONN_SCANNING ||
527 wdev->conn->state == CFG80211_CONN_SCAN_AGAIN) {
528 err = 0;
529 goto out;
530 }
531
532 /* wdev->conn->params.bssid must be set if > SCANNING */
533 err = cfg80211_mlme_deauth(rdev, wdev->netdev,
534 wdev->conn->params.bssid,
535 NULL, 0, reason, false);
536 out:
537 cfg80211_sme_free(wdev);
538 return err;
539}
540
541/*
542 * code shared for in-device and software SME
543 */
544
545static bool cfg80211_is_all_idle(void)
546{
547 struct cfg80211_registered_device *rdev;
548 struct wireless_dev *wdev;
549 bool is_all_idle = true;
550
551 /*
552 * All devices must be idle as otherwise if you are actively
553 * scanning some new beacon hints could be learned and would
554 * count as new regulatory hints.
555 */
556 list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
557 list_for_each_entry(wdev, &rdev->wdev_list, list) {
558 wdev_lock(wdev);
559 if (wdev->conn || wdev->current_bss)
560 is_all_idle = false;
561 wdev_unlock(wdev);
562 }
563 }
564
565 return is_all_idle;
566}
567
568static void disconnect_work(struct work_struct *work)
569{
570 rtnl_lock();
571 if (cfg80211_is_all_idle())
572 regulatory_hint_disconnect();
573 rtnl_unlock();
574}
575
576static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
577
578
579/*
580 * API calls for drivers implementing connect/disconnect and
581 * SME event handling
582 */
583
584/* This method must consume bss one way or another */
422void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, 585void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
423 const u8 *req_ie, size_t req_ie_len, 586 const u8 *req_ie, size_t req_ie_len,
424 const u8 *resp_ie, size_t resp_ie_len, 587 const u8 *resp_ie, size_t resp_ie_len,
@@ -434,11 +597,10 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
434 ASSERT_WDEV_LOCK(wdev); 597 ASSERT_WDEV_LOCK(wdev);
435 598
436 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && 599 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
437 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) 600 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) {
438 return; 601 cfg80211_put_bss(wdev->wiphy, bss);
439
440 if (wdev->sme_state != CFG80211_SME_CONNECTING)
441 return; 602 return;
603 }
442 604
443 nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, 605 nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev,
444 bssid, req_ie, req_ie_len, 606 bssid, req_ie, req_ie_len,
@@ -476,38 +638,30 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
476 wdev->current_bss = NULL; 638 wdev->current_bss = NULL;
477 } 639 }
478 640
479 if (wdev->conn)
480 wdev->conn->state = CFG80211_CONN_IDLE;
481
482 if (status != WLAN_STATUS_SUCCESS) { 641 if (status != WLAN_STATUS_SUCCESS) {
483 wdev->sme_state = CFG80211_SME_IDLE;
484 if (wdev->conn)
485 kfree(wdev->conn->ie);
486 kfree(wdev->conn);
487 wdev->conn = NULL;
488 kfree(wdev->connect_keys); 642 kfree(wdev->connect_keys);
489 wdev->connect_keys = NULL; 643 wdev->connect_keys = NULL;
490 wdev->ssid_len = 0; 644 wdev->ssid_len = 0;
491 cfg80211_put_bss(wdev->wiphy, bss); 645 if (bss) {
646 cfg80211_unhold_bss(bss_from_pub(bss));
647 cfg80211_put_bss(wdev->wiphy, bss);
648 }
492 return; 649 return;
493 } 650 }
494 651
495 if (!bss) 652 if (!bss) {
496 bss = cfg80211_get_bss(wdev->wiphy, 653 WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect);
497 wdev->conn ? wdev->conn->params.channel : 654 bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
498 NULL,
499 bssid,
500 wdev->ssid, wdev->ssid_len, 655 wdev->ssid, wdev->ssid_len,
501 WLAN_CAPABILITY_ESS, 656 WLAN_CAPABILITY_ESS,
502 WLAN_CAPABILITY_ESS); 657 WLAN_CAPABILITY_ESS);
658 if (WARN_ON(!bss))
659 return;
660 cfg80211_hold_bss(bss_from_pub(bss));
661 }
503 662
504 if (WARN_ON(!bss))
505 return;
506
507 cfg80211_hold_bss(bss_from_pub(bss));
508 wdev->current_bss = bss_from_pub(bss); 663 wdev->current_bss = bss_from_pub(bss);
509 664
510 wdev->sme_state = CFG80211_SME_CONNECTED;
511 cfg80211_upload_connect_keys(wdev); 665 cfg80211_upload_connect_keys(wdev);
512 666
513 rcu_read_lock(); 667 rcu_read_lock();
@@ -543,8 +697,6 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
543 struct cfg80211_event *ev; 697 struct cfg80211_event *ev;
544 unsigned long flags; 698 unsigned long flags;
545 699
546 CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING);
547
548 ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp); 700 ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp);
549 if (!ev) 701 if (!ev)
550 return; 702 return;
@@ -571,6 +723,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
571} 723}
572EXPORT_SYMBOL(cfg80211_connect_result); 724EXPORT_SYMBOL(cfg80211_connect_result);
573 725
726/* Consumes bss object one way or another */
574void __cfg80211_roamed(struct wireless_dev *wdev, 727void __cfg80211_roamed(struct wireless_dev *wdev,
575 struct cfg80211_bss *bss, 728 struct cfg80211_bss *bss,
576 const u8 *req_ie, size_t req_ie_len, 729 const u8 *req_ie, size_t req_ie_len,
@@ -585,14 +738,9 @@ void __cfg80211_roamed(struct wireless_dev *wdev,
585 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) 738 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
586 goto out; 739 goto out;
587 740
588 if (wdev->sme_state != CFG80211_SME_CONNECTED) 741 if (WARN_ON(!wdev->current_bss))
589 goto out; 742 goto out;
590 743
591 /* internal error -- how did we get to CONNECTED w/o BSS? */
592 if (WARN_ON(!wdev->current_bss)) {
593 goto out;
594 }
595
596 cfg80211_unhold_bss(wdev->current_bss); 744 cfg80211_unhold_bss(wdev->current_bss);
597 cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); 745 cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
598 wdev->current_bss = NULL; 746 wdev->current_bss = NULL;
@@ -641,8 +789,6 @@ void cfg80211_roamed(struct net_device *dev,
641 struct wireless_dev *wdev = dev->ieee80211_ptr; 789 struct wireless_dev *wdev = dev->ieee80211_ptr;
642 struct cfg80211_bss *bss; 790 struct cfg80211_bss *bss;
643 791
644 CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED);
645
646 bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid, 792 bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid,
647 wdev->ssid_len, WLAN_CAPABILITY_ESS, 793 wdev->ssid_len, WLAN_CAPABILITY_ESS,
648 WLAN_CAPABILITY_ESS); 794 WLAN_CAPABILITY_ESS);
@@ -654,6 +800,7 @@ void cfg80211_roamed(struct net_device *dev,
654} 800}
655EXPORT_SYMBOL(cfg80211_roamed); 801EXPORT_SYMBOL(cfg80211_roamed);
656 802
803/* Consumes bss object one way or another */
657void cfg80211_roamed_bss(struct net_device *dev, 804void cfg80211_roamed_bss(struct net_device *dev,
658 struct cfg80211_bss *bss, const u8 *req_ie, 805 struct cfg80211_bss *bss, const u8 *req_ie,
659 size_t req_ie_len, const u8 *resp_ie, 806 size_t req_ie_len, const u8 *resp_ie,
@@ -664,8 +811,6 @@ void cfg80211_roamed_bss(struct net_device *dev,
664 struct cfg80211_event *ev; 811 struct cfg80211_event *ev;
665 unsigned long flags; 812 unsigned long flags;
666 813
667 CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED);
668
669 if (WARN_ON(!bss)) 814 if (WARN_ON(!bss))
670 return; 815 return;
671 816
@@ -707,25 +852,14 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
707 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) 852 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
708 return; 853 return;
709 854
710 if (wdev->sme_state != CFG80211_SME_CONNECTED)
711 return;
712
713 if (wdev->current_bss) { 855 if (wdev->current_bss) {
714 cfg80211_unhold_bss(wdev->current_bss); 856 cfg80211_unhold_bss(wdev->current_bss);
715 cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); 857 cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
716 } 858 }
717 859
718 wdev->current_bss = NULL; 860 wdev->current_bss = NULL;
719 wdev->sme_state = CFG80211_SME_IDLE;
720 wdev->ssid_len = 0; 861 wdev->ssid_len = 0;
721 862
722 if (wdev->conn) {
723 kfree(wdev->conn->ie);
724 wdev->conn->ie = NULL;
725 kfree(wdev->conn);
726 wdev->conn = NULL;
727 }
728
729 nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); 863 nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap);
730 864
731 /* 865 /*
@@ -754,8 +888,6 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason,
754 struct cfg80211_event *ev; 888 struct cfg80211_event *ev;
755 unsigned long flags; 889 unsigned long flags;
756 890
757 CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED);
758
759 ev = kzalloc(sizeof(*ev) + ie_len, gfp); 891 ev = kzalloc(sizeof(*ev) + ie_len, gfp);
760 if (!ev) 892 if (!ev)
761 return; 893 return;
@@ -773,21 +905,20 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason,
773} 905}
774EXPORT_SYMBOL(cfg80211_disconnected); 906EXPORT_SYMBOL(cfg80211_disconnected);
775 907
776int __cfg80211_connect(struct cfg80211_registered_device *rdev, 908/*
777 struct net_device *dev, 909 * API calls for nl80211/wext compatibility code
778 struct cfg80211_connect_params *connect, 910 */
779 struct cfg80211_cached_keys *connkeys, 911int cfg80211_connect(struct cfg80211_registered_device *rdev,
780 const u8 *prev_bssid) 912 struct net_device *dev,
913 struct cfg80211_connect_params *connect,
914 struct cfg80211_cached_keys *connkeys,
915 const u8 *prev_bssid)
781{ 916{
782 struct wireless_dev *wdev = dev->ieee80211_ptr; 917 struct wireless_dev *wdev = dev->ieee80211_ptr;
783 struct cfg80211_bss *bss = NULL;
784 int err; 918 int err;
785 919
786 ASSERT_WDEV_LOCK(wdev); 920 ASSERT_WDEV_LOCK(wdev);
787 921
788 if (wdev->sme_state != CFG80211_SME_IDLE)
789 return -EALREADY;
790
791 if (WARN_ON(wdev->connect_keys)) { 922 if (WARN_ON(wdev->connect_keys)) {
792 kfree(wdev->connect_keys); 923 kfree(wdev->connect_keys);
793 wdev->connect_keys = NULL; 924 wdev->connect_keys = NULL;
@@ -823,219 +954,41 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
823 } 954 }
824 } 955 }
825 956
826 if (!rdev->ops->connect) { 957 wdev->connect_keys = connkeys;
827 if (!rdev->ops->auth || !rdev->ops->assoc) 958 memcpy(wdev->ssid, connect->ssid, connect->ssid_len);
828 return -EOPNOTSUPP; 959 wdev->ssid_len = connect->ssid_len;
829
830 if (WARN_ON(wdev->conn))
831 return -EINPROGRESS;
832 960
833 wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL); 961 if (!rdev->ops->connect)
834 if (!wdev->conn) 962 err = cfg80211_sme_connect(wdev, connect, prev_bssid);
835 return -ENOMEM; 963 else
836
837 /*
838 * Copy all parameters, and treat explicitly IEs, BSSID, SSID.
839 */
840 memcpy(&wdev->conn->params, connect, sizeof(*connect));
841 if (connect->bssid) {
842 wdev->conn->params.bssid = wdev->conn->bssid;
843 memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN);
844 }
845
846 if (connect->ie) {
847 wdev->conn->ie = kmemdup(connect->ie, connect->ie_len,
848 GFP_KERNEL);
849 wdev->conn->params.ie = wdev->conn->ie;
850 if (!wdev->conn->ie) {
851 kfree(wdev->conn);
852 wdev->conn = NULL;
853 return -ENOMEM;
854 }
855 }
856
857 if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) {
858 wdev->conn->auto_auth = true;
859 /* start with open system ... should mostly work */
860 wdev->conn->params.auth_type =
861 NL80211_AUTHTYPE_OPEN_SYSTEM;
862 } else {
863 wdev->conn->auto_auth = false;
864 }
865
866 memcpy(wdev->ssid, connect->ssid, connect->ssid_len);
867 wdev->ssid_len = connect->ssid_len;
868 wdev->conn->params.ssid = wdev->ssid;
869 wdev->conn->params.ssid_len = connect->ssid_len;
870
871 /* see if we have the bss already */
872 bss = cfg80211_get_conn_bss(wdev);
873
874 wdev->sme_state = CFG80211_SME_CONNECTING;
875 wdev->connect_keys = connkeys;
876
877 if (prev_bssid) {
878 memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN);
879 wdev->conn->prev_bssid_valid = true;
880 }
881
882 /* we're good if we have a matching bss struct */
883 if (bss) {
884 wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
885 err = cfg80211_conn_do_work(wdev);
886 cfg80211_put_bss(wdev->wiphy, bss);
887 } else {
888 /* otherwise we'll need to scan for the AP first */
889 err = cfg80211_conn_scan(wdev);
890 /*
891 * If we can't scan right now, then we need to scan again
892 * after the current scan finished, since the parameters
893 * changed (unless we find a good AP anyway).
894 */
895 if (err == -EBUSY) {
896 err = 0;
897 wdev->conn->state = CFG80211_CONN_SCAN_AGAIN;
898 }
899 }
900 if (err) {
901 kfree(wdev->conn->ie);
902 kfree(wdev->conn);
903 wdev->conn = NULL;
904 wdev->sme_state = CFG80211_SME_IDLE;
905 wdev->connect_keys = NULL;
906 wdev->ssid_len = 0;
907 }
908
909 return err;
910 } else {
911 wdev->sme_state = CFG80211_SME_CONNECTING;
912 wdev->connect_keys = connkeys;
913 err = rdev_connect(rdev, dev, connect); 964 err = rdev_connect(rdev, dev, connect);
914 if (err) {
915 wdev->connect_keys = NULL;
916 wdev->sme_state = CFG80211_SME_IDLE;
917 return err;
918 }
919 965
920 memcpy(wdev->ssid, connect->ssid, connect->ssid_len); 966 if (err) {
921 wdev->ssid_len = connect->ssid_len; 967 wdev->connect_keys = NULL;
922 968 wdev->ssid_len = 0;
923 return 0; 969 return err;
924 } 970 }
925}
926 971
927int cfg80211_connect(struct cfg80211_registered_device *rdev, 972 return 0;
928 struct net_device *dev,
929 struct cfg80211_connect_params *connect,
930 struct cfg80211_cached_keys *connkeys)
931{
932 int err;
933
934 mutex_lock(&rdev->devlist_mtx);
935 /* might request scan - scan_mtx -> wdev_mtx dependency */
936 mutex_lock(&rdev->sched_scan_mtx);
937 wdev_lock(dev->ieee80211_ptr);
938 err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL);
939 wdev_unlock(dev->ieee80211_ptr);
940 mutex_unlock(&rdev->sched_scan_mtx);
941 mutex_unlock(&rdev->devlist_mtx);
942
943 return err;
944} 973}
945 974
946int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, 975int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
947 struct net_device *dev, u16 reason, bool wextev) 976 struct net_device *dev, u16 reason, bool wextev)
948{ 977{
949 struct wireless_dev *wdev = dev->ieee80211_ptr; 978 struct wireless_dev *wdev = dev->ieee80211_ptr;
950 int err; 979 int err = 0;
951 980
952 ASSERT_WDEV_LOCK(wdev); 981 ASSERT_WDEV_LOCK(wdev);
953 982
954 if (wdev->sme_state == CFG80211_SME_IDLE)
955 return -EINVAL;
956
957 kfree(wdev->connect_keys); 983 kfree(wdev->connect_keys);
958 wdev->connect_keys = NULL; 984 wdev->connect_keys = NULL;
959 985
960 if (!rdev->ops->disconnect) { 986 if (wdev->conn)
961 if (!rdev->ops->deauth) 987 err = cfg80211_sme_disconnect(wdev, reason);
962 return -EOPNOTSUPP; 988 else if (!rdev->ops->disconnect)
963 989 cfg80211_mlme_down(rdev, dev);
964 /* was it connected by userspace SME? */ 990 else if (wdev->current_bss)
965 if (!wdev->conn) {
966 cfg80211_mlme_down(rdev, dev);
967 goto disconnect;
968 }
969
970 if (wdev->sme_state == CFG80211_SME_CONNECTING &&
971 (wdev->conn->state == CFG80211_CONN_SCANNING ||
972 wdev->conn->state == CFG80211_CONN_SCAN_AGAIN)) {
973 wdev->sme_state = CFG80211_SME_IDLE;
974 kfree(wdev->conn->ie);
975 kfree(wdev->conn);
976 wdev->conn = NULL;
977 wdev->ssid_len = 0;
978 return 0;
979 }
980
981 /* wdev->conn->params.bssid must be set if > SCANNING */
982 err = __cfg80211_mlme_deauth(rdev, dev,
983 wdev->conn->params.bssid,
984 NULL, 0, reason, false);
985 if (err)
986 return err;
987 } else {
988 err = rdev_disconnect(rdev, dev, reason); 991 err = rdev_disconnect(rdev, dev, reason);
989 if (err)
990 return err;
991 }
992
993 disconnect:
994 if (wdev->sme_state == CFG80211_SME_CONNECTED)
995 __cfg80211_disconnected(dev, NULL, 0, 0, false);
996 else if (wdev->sme_state == CFG80211_SME_CONNECTING)
997 __cfg80211_connect_result(dev, NULL, NULL, 0, NULL, 0,
998 WLAN_STATUS_UNSPECIFIED_FAILURE,
999 wextev, NULL);
1000
1001 return 0;
1002}
1003
1004int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
1005 struct net_device *dev,
1006 u16 reason, bool wextev)
1007{
1008 int err;
1009
1010 wdev_lock(dev->ieee80211_ptr);
1011 err = __cfg80211_disconnect(rdev, dev, reason, wextev);
1012 wdev_unlock(dev->ieee80211_ptr);
1013 992
1014 return err; 993 return err;
1015} 994}
1016
1017void cfg80211_sme_disassoc(struct net_device *dev,
1018 struct cfg80211_internal_bss *bss)
1019{
1020 struct wireless_dev *wdev = dev->ieee80211_ptr;
1021 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
1022 u8 bssid[ETH_ALEN];
1023
1024 ASSERT_WDEV_LOCK(wdev);
1025
1026 if (!wdev->conn)
1027 return;
1028
1029 if (wdev->conn->state == CFG80211_CONN_IDLE)
1030 return;
1031
1032 /*
1033 * Ok, so the association was made by this SME -- we don't
1034 * want it any more so deauthenticate too.
1035 */
1036
1037 memcpy(bssid, bss->pub.bssid, ETH_ALEN);
1038
1039 __cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0,
1040 WLAN_REASON_DEAUTH_LEAVING, false);
1041}
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 8f28b9f798d8..9ee6bc1a7610 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -30,7 +30,8 @@ static ssize_t name ## _show(struct device *dev, \
30 char *buf) \ 30 char *buf) \
31{ \ 31{ \
32 return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \ 32 return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \
33} 33} \
34static DEVICE_ATTR_RO(name)
34 35
35SHOW_FMT(index, "%d", wiphy_idx); 36SHOW_FMT(index, "%d", wiphy_idx);
36SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); 37SHOW_FMT(macaddress, "%pM", wiphy.perm_addr);
@@ -42,7 +43,7 @@ static ssize_t name_show(struct device *dev,
42 struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy; 43 struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy;
43 return sprintf(buf, "%s\n", dev_name(&wiphy->dev)); 44 return sprintf(buf, "%s\n", dev_name(&wiphy->dev));
44} 45}
45 46static DEVICE_ATTR_RO(name);
46 47
47static ssize_t addresses_show(struct device *dev, 48static ssize_t addresses_show(struct device *dev,
48 struct device_attribute *attr, 49 struct device_attribute *attr,
@@ -60,15 +61,17 @@ static ssize_t addresses_show(struct device *dev,
60 61
61 return buf - start; 62 return buf - start;
62} 63}
63 64static DEVICE_ATTR_RO(addresses);
64static struct device_attribute ieee80211_dev_attrs[] = { 65
65 __ATTR_RO(index), 66static struct attribute *ieee80211_attrs[] = {
66 __ATTR_RO(macaddress), 67 &dev_attr_index.attr,
67 __ATTR_RO(address_mask), 68 &dev_attr_macaddress.attr,
68 __ATTR_RO(addresses), 69 &dev_attr_address_mask.attr,
69 __ATTR_RO(name), 70 &dev_attr_addresses.attr,
70 {} 71 &dev_attr_name.attr,
72 NULL,
71}; 73};
74ATTRIBUTE_GROUPS(ieee80211);
72 75
73static void wiphy_dev_release(struct device *dev) 76static void wiphy_dev_release(struct device *dev)
74{ 77{
@@ -83,6 +86,7 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
83 return 0; 86 return 0;
84} 87}
85 88
89#ifdef CONFIG_PM
86static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) 90static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
87{ 91{
88 struct wireless_dev *wdev; 92 struct wireless_dev *wdev;
@@ -100,10 +104,10 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)
100 104
101 rtnl_lock(); 105 rtnl_lock();
102 if (rdev->wiphy.registered) { 106 if (rdev->wiphy.registered) {
103 if (!rdev->wowlan) 107 if (!rdev->wiphy.wowlan_config)
104 cfg80211_leave_all(rdev); 108 cfg80211_leave_all(rdev);
105 if (rdev->ops->suspend) 109 if (rdev->ops->suspend)
106 ret = rdev_suspend(rdev, rdev->wowlan); 110 ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config);
107 if (ret == 1) { 111 if (ret == 1) {
108 /* Driver refuse to configure wowlan */ 112 /* Driver refuse to configure wowlan */
109 cfg80211_leave_all(rdev); 113 cfg80211_leave_all(rdev);
@@ -132,6 +136,7 @@ static int wiphy_resume(struct device *dev)
132 136
133 return ret; 137 return ret;
134} 138}
139#endif
135 140
136static const void *wiphy_namespace(struct device *d) 141static const void *wiphy_namespace(struct device *d)
137{ 142{
@@ -144,10 +149,12 @@ struct class ieee80211_class = {
144 .name = "ieee80211", 149 .name = "ieee80211",
145 .owner = THIS_MODULE, 150 .owner = THIS_MODULE,
146 .dev_release = wiphy_dev_release, 151 .dev_release = wiphy_dev_release,
147 .dev_attrs = ieee80211_dev_attrs, 152 .dev_groups = ieee80211_groups,
148 .dev_uevent = wiphy_uevent, 153 .dev_uevent = wiphy_uevent,
154#ifdef CONFIG_PM
149 .suspend = wiphy_suspend, 155 .suspend = wiphy_suspend,
150 .resume = wiphy_resume, 156 .resume = wiphy_resume,
157#endif
151 .ns_type = &net_ns_type_operations, 158 .ns_type = &net_ns_type_operations,
152 .namespace = wiphy_namespace, 159 .namespace = wiphy_namespace,
153}; 160};
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 5755bc14abbd..ba5f0d6614d5 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1293,15 +1293,17 @@ TRACE_EVENT(rdev_return_int_int,
1293 1293
1294#ifdef CONFIG_NL80211_TESTMODE 1294#ifdef CONFIG_NL80211_TESTMODE
1295TRACE_EVENT(rdev_testmode_cmd, 1295TRACE_EVENT(rdev_testmode_cmd,
1296 TP_PROTO(struct wiphy *wiphy), 1296 TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
1297 TP_ARGS(wiphy), 1297 TP_ARGS(wiphy, wdev),
1298 TP_STRUCT__entry( 1298 TP_STRUCT__entry(
1299 WIPHY_ENTRY 1299 WIPHY_ENTRY
1300 WDEV_ENTRY
1300 ), 1301 ),
1301 TP_fast_assign( 1302 TP_fast_assign(
1302 WIPHY_ASSIGN; 1303 WIPHY_ASSIGN;
1304 WDEV_ASSIGN;
1303 ), 1305 ),
1304 TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG) 1306 TP_printk(WIPHY_PR_FMT WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
1305); 1307);
1306 1308
1307TRACE_EVENT(rdev_testmode_dump, 1309TRACE_EVENT(rdev_testmode_dump,
@@ -1841,6 +1843,39 @@ TRACE_EVENT(rdev_crit_proto_stop,
1841 WIPHY_PR_ARG, WDEV_PR_ARG) 1843 WIPHY_PR_ARG, WDEV_PR_ARG)
1842); 1844);
1843 1845
1846TRACE_EVENT(rdev_channel_switch,
1847 TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
1848 struct cfg80211_csa_settings *params),
1849 TP_ARGS(wiphy, netdev, params),
1850 TP_STRUCT__entry(
1851 WIPHY_ENTRY
1852 NETDEV_ENTRY
1853 CHAN_DEF_ENTRY
1854 __field(u16, counter_offset_beacon)
1855 __field(u16, counter_offset_presp)
1856 __field(bool, radar_required)
1857 __field(bool, block_tx)
1858 __field(u8, count)
1859 ),
1860 TP_fast_assign(
1861 WIPHY_ASSIGN;
1862 NETDEV_ASSIGN;
1863 CHAN_DEF_ASSIGN(&params->chandef);
1864 __entry->counter_offset_beacon = params->counter_offset_beacon;
1865 __entry->counter_offset_presp = params->counter_offset_presp;
1866 __entry->radar_required = params->radar_required;
1867 __entry->block_tx = params->block_tx;
1868 __entry->count = params->count;
1869 ),
1870 TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
1871 ", block_tx: %d, count: %u, radar_required: %d"
1872 ", counter offsets (beacon/presp): %u/%u",
1873 WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
1874 __entry->block_tx, __entry->count, __entry->radar_required,
1875 __entry->counter_offset_beacon,
1876 __entry->counter_offset_presp)
1877);
1878
1844/************************************************************* 1879/*************************************************************
1845 * cfg80211 exported functions traces * 1880 * cfg80211 exported functions traces *
1846 *************************************************************/ 1881 *************************************************************/
@@ -1911,24 +1946,46 @@ TRACE_EVENT(cfg80211_send_rx_assoc,
1911 NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG) 1946 NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG)
1912); 1947);
1913 1948
1914DEFINE_EVENT(netdev_evt_only, __cfg80211_send_deauth, 1949DECLARE_EVENT_CLASS(netdev_frame_event,
1915 TP_PROTO(struct net_device *netdev), 1950 TP_PROTO(struct net_device *netdev, const u8 *buf, int len),
1916 TP_ARGS(netdev) 1951 TP_ARGS(netdev, buf, len),
1952 TP_STRUCT__entry(
1953 NETDEV_ENTRY
1954 __dynamic_array(u8, frame, len)
1955 ),
1956 TP_fast_assign(
1957 NETDEV_ASSIGN;
1958 memcpy(__get_dynamic_array(frame), buf, len);
1959 ),
1960 TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x",
1961 NETDEV_PR_ARG,
1962 le16_to_cpup((__le16 *)__get_dynamic_array(frame)))
1917); 1963);
1918 1964
1919DEFINE_EVENT(netdev_evt_only, __cfg80211_send_disassoc, 1965DEFINE_EVENT(netdev_frame_event, cfg80211_rx_unprot_mlme_mgmt,
1920 TP_PROTO(struct net_device *netdev), 1966 TP_PROTO(struct net_device *netdev, const u8 *buf, int len),
1921 TP_ARGS(netdev) 1967 TP_ARGS(netdev, buf, len)
1922); 1968);
1923 1969
1924DEFINE_EVENT(netdev_evt_only, cfg80211_send_unprot_deauth, 1970DEFINE_EVENT(netdev_frame_event, cfg80211_rx_mlme_mgmt,
1925 TP_PROTO(struct net_device *netdev), 1971 TP_PROTO(struct net_device *netdev, const u8 *buf, int len),
1926 TP_ARGS(netdev) 1972 TP_ARGS(netdev, buf, len)
1927); 1973);
1928 1974
1929DEFINE_EVENT(netdev_evt_only, cfg80211_send_unprot_disassoc, 1975TRACE_EVENT(cfg80211_tx_mlme_mgmt,
1930 TP_PROTO(struct net_device *netdev), 1976 TP_PROTO(struct net_device *netdev, const u8 *buf, int len),
1931 TP_ARGS(netdev) 1977 TP_ARGS(netdev, buf, len),
1978 TP_STRUCT__entry(
1979 NETDEV_ENTRY
1980 __dynamic_array(u8, frame, len)
1981 ),
1982 TP_fast_assign(
1983 NETDEV_ASSIGN;
1984 memcpy(__get_dynamic_array(frame), buf, len);
1985 ),
1986 TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x",
1987 NETDEV_PR_ARG,
1988 le16_to_cpup((__le16 *)__get_dynamic_array(frame)))
1932); 1989);
1933 1990
1934DECLARE_EVENT_CLASS(netdev_mac_evt, 1991DECLARE_EVENT_CLASS(netdev_mac_evt,
@@ -2369,26 +2426,30 @@ TRACE_EVENT(cfg80211_get_bss,
2369 __entry->capa_mask, __entry->capa_val) 2426 __entry->capa_mask, __entry->capa_val)
2370); 2427);
2371 2428
2372TRACE_EVENT(cfg80211_inform_bss_frame, 2429TRACE_EVENT(cfg80211_inform_bss_width_frame,
2373 TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel, 2430 TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel,
2431 enum nl80211_bss_scan_width scan_width,
2374 struct ieee80211_mgmt *mgmt, size_t len, 2432 struct ieee80211_mgmt *mgmt, size_t len,
2375 s32 signal), 2433 s32 signal),
2376 TP_ARGS(wiphy, channel, mgmt, len, signal), 2434 TP_ARGS(wiphy, channel, scan_width, mgmt, len, signal),
2377 TP_STRUCT__entry( 2435 TP_STRUCT__entry(
2378 WIPHY_ENTRY 2436 WIPHY_ENTRY
2379 CHAN_ENTRY 2437 CHAN_ENTRY
2438 __field(enum nl80211_bss_scan_width, scan_width)
2380 __dynamic_array(u8, mgmt, len) 2439 __dynamic_array(u8, mgmt, len)
2381 __field(s32, signal) 2440 __field(s32, signal)
2382 ), 2441 ),
2383 TP_fast_assign( 2442 TP_fast_assign(
2384 WIPHY_ASSIGN; 2443 WIPHY_ASSIGN;
2385 CHAN_ASSIGN(channel); 2444 CHAN_ASSIGN(channel);
2445 __entry->scan_width = scan_width;
2386 if (mgmt) 2446 if (mgmt)
2387 memcpy(__get_dynamic_array(mgmt), mgmt, len); 2447 memcpy(__get_dynamic_array(mgmt), mgmt, len);
2388 __entry->signal = signal; 2448 __entry->signal = signal;
2389 ), 2449 ),
2390 TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "signal: %d", 2450 TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d",
2391 WIPHY_PR_ARG, CHAN_PR_ARG, __entry->signal) 2451 WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
2452 __entry->signal)
2392); 2453);
2393 2454
2394DECLARE_EVENT_CLASS(cfg80211_bss_evt, 2455DECLARE_EVENT_CLASS(cfg80211_bss_evt,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index f5ad4d94ba88..ce090c1c5e4f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -33,6 +33,35 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
33} 33}
34EXPORT_SYMBOL(ieee80211_get_response_rate); 34EXPORT_SYMBOL(ieee80211_get_response_rate);
35 35
36u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
37 enum nl80211_bss_scan_width scan_width)
38{
39 struct ieee80211_rate *bitrates;
40 u32 mandatory_rates = 0;
41 enum ieee80211_rate_flags mandatory_flag;
42 int i;
43
44 if (WARN_ON(!sband))
45 return 1;
46
47 if (sband->band == IEEE80211_BAND_2GHZ) {
48 if (scan_width == NL80211_BSS_CHAN_WIDTH_5 ||
49 scan_width == NL80211_BSS_CHAN_WIDTH_10)
50 mandatory_flag = IEEE80211_RATE_MANDATORY_G;
51 else
52 mandatory_flag = IEEE80211_RATE_MANDATORY_B;
53 } else {
54 mandatory_flag = IEEE80211_RATE_MANDATORY_A;
55 }
56
57 bitrates = sband->bitrates;
58 for (i = 0; i < sband->n_bitrates; i++)
59 if (bitrates[i].flags & mandatory_flag)
60 mandatory_rates |= BIT(i);
61 return mandatory_rates;
62}
63EXPORT_SYMBOL(ieee80211_mandatory_rates);
64
36int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) 65int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band)
37{ 66{
38 /* see 802.11 17.3.8.3.2 and Annex J 67 /* see 802.11 17.3.8.3.2 and Annex J
@@ -785,12 +814,8 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev)
785 ASSERT_RTNL(); 814 ASSERT_RTNL();
786 ASSERT_RDEV_LOCK(rdev); 815 ASSERT_RDEV_LOCK(rdev);
787 816
788 mutex_lock(&rdev->devlist_mtx);
789
790 list_for_each_entry(wdev, &rdev->wdev_list, list) 817 list_for_each_entry(wdev, &rdev->wdev_list, list)
791 cfg80211_process_wdev_events(wdev); 818 cfg80211_process_wdev_events(wdev);
792
793 mutex_unlock(&rdev->devlist_mtx);
794} 819}
795 820
796int cfg80211_change_iface(struct cfg80211_registered_device *rdev, 821int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
@@ -822,10 +847,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
822 return -EBUSY; 847 return -EBUSY;
823 848
824 if (ntype != otype && netif_running(dev)) { 849 if (ntype != otype && netif_running(dev)) {
825 mutex_lock(&rdev->devlist_mtx);
826 err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr, 850 err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr,
827 ntype); 851 ntype);
828 mutex_unlock(&rdev->devlist_mtx);
829 if (err) 852 if (err)
830 return err; 853 return err;
831 854
@@ -841,8 +864,10 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
841 break; 864 break;
842 case NL80211_IFTYPE_STATION: 865 case NL80211_IFTYPE_STATION:
843 case NL80211_IFTYPE_P2P_CLIENT: 866 case NL80211_IFTYPE_P2P_CLIENT:
867 wdev_lock(dev->ieee80211_ptr);
844 cfg80211_disconnect(rdev, dev, 868 cfg80211_disconnect(rdev, dev,
845 WLAN_REASON_DEAUTH_LEAVING, true); 869 WLAN_REASON_DEAUTH_LEAVING, true);
870 wdev_unlock(dev->ieee80211_ptr);
846 break; 871 break;
847 case NL80211_IFTYPE_MESH_POINT: 872 case NL80211_IFTYPE_MESH_POINT:
848 /* mesh should be handled? */ 873 /* mesh should be handled? */
@@ -1169,6 +1194,9 @@ bool ieee80211_operating_class_to_band(u8 operating_class,
1169 case 84: 1194 case 84:
1170 *band = IEEE80211_BAND_2GHZ; 1195 *band = IEEE80211_BAND_2GHZ;
1171 return true; 1196 return true;
1197 case 180:
1198 *band = IEEE80211_BAND_60GHZ;
1199 return true;
1172 } 1200 }
1173 1201
1174 return false; 1202 return false;
@@ -1184,8 +1212,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
1184 if (!beacon_int) 1212 if (!beacon_int)
1185 return -EINVAL; 1213 return -EINVAL;
1186 1214
1187 mutex_lock(&rdev->devlist_mtx);
1188
1189 list_for_each_entry(wdev, &rdev->wdev_list, list) { 1215 list_for_each_entry(wdev, &rdev->wdev_list, list) {
1190 if (!wdev->beacon_interval) 1216 if (!wdev->beacon_interval)
1191 continue; 1217 continue;
@@ -1195,8 +1221,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
1195 } 1221 }
1196 } 1222 }
1197 1223
1198 mutex_unlock(&rdev->devlist_mtx);
1199
1200 return res; 1224 return res;
1201} 1225}
1202 1226
@@ -1220,7 +1244,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
1220 int i, j; 1244 int i, j;
1221 1245
1222 ASSERT_RTNL(); 1246 ASSERT_RTNL();
1223 lockdep_assert_held(&rdev->devlist_mtx);
1224 1247
1225 if (WARN_ON(hweight32(radar_detect) > 1)) 1248 if (WARN_ON(hweight32(radar_detect) > 1))
1226 return -EINVAL; 1249 return -EINVAL;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index d997d0f0c54a..e7c6e862580d 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -72,7 +72,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
72 struct cfg80211_registered_device *rdev; 72 struct cfg80211_registered_device *rdev;
73 struct vif_params vifparams; 73 struct vif_params vifparams;
74 enum nl80211_iftype type; 74 enum nl80211_iftype type;
75 int ret;
76 75
77 rdev = wiphy_to_dev(wdev->wiphy); 76 rdev = wiphy_to_dev(wdev->wiphy);
78 77
@@ -98,11 +97,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
98 97
99 memset(&vifparams, 0, sizeof(vifparams)); 98 memset(&vifparams, 0, sizeof(vifparams));
100 99
101 cfg80211_lock_rdev(rdev); 100 return cfg80211_change_iface(rdev, dev, type, NULL, &vifparams);
102 ret = cfg80211_change_iface(rdev, dev, type, NULL, &vifparams);
103 cfg80211_unlock_rdev(rdev);
104
105 return ret;
106} 101}
107EXPORT_SYMBOL_GPL(cfg80211_wext_siwmode); 102EXPORT_SYMBOL_GPL(cfg80211_wext_siwmode);
108 103
@@ -579,13 +574,10 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
579{ 574{
580 int err; 575 int err;
581 576
582 /* devlist mutex needed for possible IBSS re-join */
583 mutex_lock(&rdev->devlist_mtx);
584 wdev_lock(dev->ieee80211_ptr); 577 wdev_lock(dev->ieee80211_ptr);
585 err = __cfg80211_set_encryption(rdev, dev, pairwise, addr, 578 err = __cfg80211_set_encryption(rdev, dev, pairwise, addr,
586 remove, tx_key, idx, params); 579 remove, tx_key, idx, params);
587 wdev_unlock(dev->ieee80211_ptr); 580 wdev_unlock(dev->ieee80211_ptr);
588 mutex_unlock(&rdev->devlist_mtx);
589 581
590 return err; 582 return err;
591} 583}
@@ -787,7 +779,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
787 struct cfg80211_chan_def chandef = { 779 struct cfg80211_chan_def chandef = {
788 .width = NL80211_CHAN_WIDTH_20_NOHT, 780 .width = NL80211_CHAN_WIDTH_20_NOHT,
789 }; 781 };
790 int freq, err; 782 int freq;
791 783
792 switch (wdev->iftype) { 784 switch (wdev->iftype) {
793 case NL80211_IFTYPE_STATION: 785 case NL80211_IFTYPE_STATION:
@@ -804,10 +796,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
804 chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); 796 chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq);
805 if (!chandef.chan) 797 if (!chandef.chan)
806 return -EINVAL; 798 return -EINVAL;
807 mutex_lock(&rdev->devlist_mtx); 799 return cfg80211_set_monitor_channel(rdev, &chandef);
808 err = cfg80211_set_monitor_channel(rdev, &chandef);
809 mutex_unlock(&rdev->devlist_mtx);
810 return err;
811 case NL80211_IFTYPE_MESH_POINT: 800 case NL80211_IFTYPE_MESH_POINT:
812 freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); 801 freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
813 if (freq < 0) 802 if (freq < 0)
@@ -818,10 +807,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
818 chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); 807 chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq);
819 if (!chandef.chan) 808 if (!chandef.chan)
820 return -EINVAL; 809 return -EINVAL;
821 mutex_lock(&rdev->devlist_mtx); 810 return cfg80211_set_mesh_channel(rdev, wdev, &chandef);
822 err = cfg80211_set_mesh_channel(rdev, wdev, &chandef);
823 mutex_unlock(&rdev->devlist_mtx);
824 return err;
825 default: 811 default:
826 return -EOPNOTSUPP; 812 return -EOPNOTSUPP;
827 } 813 }
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index e79cb5c0655a..14c9a2583ba0 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -54,8 +54,8 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
54 if (wdev->wext.prev_bssid_valid) 54 if (wdev->wext.prev_bssid_valid)
55 prev_bssid = wdev->wext.prev_bssid; 55 prev_bssid = wdev->wext.prev_bssid;
56 56
57 err = __cfg80211_connect(rdev, wdev->netdev, 57 err = cfg80211_connect(rdev, wdev->netdev,
58 &wdev->wext.connect, ck, prev_bssid); 58 &wdev->wext.connect, ck, prev_bssid);
59 if (err) 59 if (err)
60 kfree(ck); 60 kfree(ck);
61 61
@@ -87,12 +87,9 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
87 return -EINVAL; 87 return -EINVAL;
88 } 88 }
89 89
90 cfg80211_lock_rdev(rdev);
91 mutex_lock(&rdev->devlist_mtx);
92 mutex_lock(&rdev->sched_scan_mtx);
93 wdev_lock(wdev); 90 wdev_lock(wdev);
94 91
95 if (wdev->sme_state != CFG80211_SME_IDLE) { 92 if (wdev->conn) {
96 bool event = true; 93 bool event = true;
97 94
98 if (wdev->wext.connect.channel == chan) { 95 if (wdev->wext.connect.channel == chan) {
@@ -103,8 +100,8 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
103 /* if SSID set, we'll try right again, avoid event */ 100 /* if SSID set, we'll try right again, avoid event */
104 if (wdev->wext.connect.ssid_len) 101 if (wdev->wext.connect.ssid_len)
105 event = false; 102 event = false;
106 err = __cfg80211_disconnect(rdev, dev, 103 err = cfg80211_disconnect(rdev, dev,
107 WLAN_REASON_DEAUTH_LEAVING, event); 104 WLAN_REASON_DEAUTH_LEAVING, event);
108 if (err) 105 if (err)
109 goto out; 106 goto out;
110 } 107 }
@@ -136,9 +133,6 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
136 err = cfg80211_mgd_wext_connect(rdev, wdev); 133 err = cfg80211_mgd_wext_connect(rdev, wdev);
137 out: 134 out:
138 wdev_unlock(wdev); 135 wdev_unlock(wdev);
139 mutex_unlock(&rdev->sched_scan_mtx);
140 mutex_unlock(&rdev->devlist_mtx);
141 cfg80211_unlock_rdev(rdev);
142 return err; 136 return err;
143} 137}
144 138
@@ -190,14 +184,11 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
190 if (len > 0 && ssid[len - 1] == '\0') 184 if (len > 0 && ssid[len - 1] == '\0')
191 len--; 185 len--;
192 186
193 cfg80211_lock_rdev(rdev);
194 mutex_lock(&rdev->devlist_mtx);
195 mutex_lock(&rdev->sched_scan_mtx);
196 wdev_lock(wdev); 187 wdev_lock(wdev);
197 188
198 err = 0; 189 err = 0;
199 190
200 if (wdev->sme_state != CFG80211_SME_IDLE) { 191 if (wdev->conn) {
201 bool event = true; 192 bool event = true;
202 193
203 if (wdev->wext.connect.ssid && len && 194 if (wdev->wext.connect.ssid && len &&
@@ -208,8 +199,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
208 /* if SSID set now, we'll try to connect, avoid event */ 199 /* if SSID set now, we'll try to connect, avoid event */
209 if (len) 200 if (len)
210 event = false; 201 event = false;
211 err = __cfg80211_disconnect(rdev, dev, 202 err = cfg80211_disconnect(rdev, dev,
212 WLAN_REASON_DEAUTH_LEAVING, event); 203 WLAN_REASON_DEAUTH_LEAVING, event);
213 if (err) 204 if (err)
214 goto out; 205 goto out;
215 } 206 }
@@ -226,9 +217,6 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
226 err = cfg80211_mgd_wext_connect(rdev, wdev); 217 err = cfg80211_mgd_wext_connect(rdev, wdev);
227 out: 218 out:
228 wdev_unlock(wdev); 219 wdev_unlock(wdev);
229 mutex_unlock(&rdev->sched_scan_mtx);
230 mutex_unlock(&rdev->devlist_mtx);
231 cfg80211_unlock_rdev(rdev);
232 return err; 220 return err;
233} 221}
234 222
@@ -287,12 +275,9 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
287 if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) 275 if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid))
288 bssid = NULL; 276 bssid = NULL;
289 277
290 cfg80211_lock_rdev(rdev);
291 mutex_lock(&rdev->devlist_mtx);
292 mutex_lock(&rdev->sched_scan_mtx);
293 wdev_lock(wdev); 278 wdev_lock(wdev);
294 279
295 if (wdev->sme_state != CFG80211_SME_IDLE) { 280 if (wdev->conn) {
296 err = 0; 281 err = 0;
297 /* both automatic */ 282 /* both automatic */
298 if (!bssid && !wdev->wext.connect.bssid) 283 if (!bssid && !wdev->wext.connect.bssid)
@@ -303,8 +288,8 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
303 ether_addr_equal(bssid, wdev->wext.connect.bssid)) 288 ether_addr_equal(bssid, wdev->wext.connect.bssid))
304 goto out; 289 goto out;
305 290
306 err = __cfg80211_disconnect(rdev, dev, 291 err = cfg80211_disconnect(rdev, dev,
307 WLAN_REASON_DEAUTH_LEAVING, false); 292 WLAN_REASON_DEAUTH_LEAVING, false);
308 if (err) 293 if (err)
309 goto out; 294 goto out;
310 } 295 }
@@ -318,9 +303,6 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
318 err = cfg80211_mgd_wext_connect(rdev, wdev); 303 err = cfg80211_mgd_wext_connect(rdev, wdev);
319 out: 304 out:
320 wdev_unlock(wdev); 305 wdev_unlock(wdev);
321 mutex_unlock(&rdev->sched_scan_mtx);
322 mutex_unlock(&rdev->devlist_mtx);
323 cfg80211_unlock_rdev(rdev);
324 return err; 306 return err;
325} 307}
326 308
@@ -382,9 +364,9 @@ int cfg80211_wext_siwgenie(struct net_device *dev,
382 wdev->wext.ie = ie; 364 wdev->wext.ie = ie;
383 wdev->wext.ie_len = ie_len; 365 wdev->wext.ie_len = ie_len;
384 366
385 if (wdev->sme_state != CFG80211_SME_IDLE) { 367 if (wdev->conn) {
386 err = __cfg80211_disconnect(rdev, dev, 368 err = cfg80211_disconnect(rdev, dev,
387 WLAN_REASON_DEAUTH_LEAVING, false); 369 WLAN_REASON_DEAUTH_LEAVING, false);
388 if (err) 370 if (err)
389 goto out; 371 goto out;
390 } 372 }
@@ -420,8 +402,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
420 switch (mlme->cmd) { 402 switch (mlme->cmd) {
421 case IW_MLME_DEAUTH: 403 case IW_MLME_DEAUTH:
422 case IW_MLME_DISASSOC: 404 case IW_MLME_DISASSOC:
423 err = __cfg80211_disconnect(rdev, dev, mlme->reason_code, 405 err = cfg80211_disconnect(rdev, dev, mlme->reason_code, true);
424 true);
425 break; 406 break;
426 default: 407 default:
427 err = -EOPNOTSUPP; 408 err = -EOPNOTSUPP;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 37ca9694aabe..45a3ab5612c1 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -224,7 +224,7 @@ static void x25_kill_by_device(struct net_device *dev)
224static int x25_device_event(struct notifier_block *this, unsigned long event, 224static int x25_device_event(struct notifier_block *this, unsigned long event,
225 void *ptr) 225 void *ptr)
226{ 226{
227 struct net_device *dev = ptr; 227 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
228 struct x25_neigh *nb; 228 struct x25_neigh *nb;
229 229
230 if (!net_eq(dev_net(dev), &init_net)) 230 if (!net_eq(dev_net(dev), &init_net))
@@ -1583,11 +1583,11 @@ out_cud_release:
1583 case SIOCX25CALLACCPTAPPRV: { 1583 case SIOCX25CALLACCPTAPPRV: {
1584 rc = -EINVAL; 1584 rc = -EINVAL;
1585 lock_sock(sk); 1585 lock_sock(sk);
1586 if (sk->sk_state != TCP_CLOSE) 1586 if (sk->sk_state == TCP_CLOSE) {
1587 break; 1587 clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
1588 clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); 1588 rc = 0;
1589 }
1589 release_sock(sk); 1590 release_sock(sk);
1590 rc = 0;
1591 break; 1591 break;
1592 } 1592 }
1593 1593
@@ -1595,14 +1595,15 @@ out_cud_release:
1595 rc = -EINVAL; 1595 rc = -EINVAL;
1596 lock_sock(sk); 1596 lock_sock(sk);
1597 if (sk->sk_state != TCP_ESTABLISHED) 1597 if (sk->sk_state != TCP_ESTABLISHED)
1598 break; 1598 goto out_sendcallaccpt_release;
1599 /* must call accptapprv above */ 1599 /* must call accptapprv above */
1600 if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) 1600 if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags))
1601 break; 1601 goto out_sendcallaccpt_release;
1602 x25_write_internal(sk, X25_CALL_ACCEPTED); 1602 x25_write_internal(sk, X25_CALL_ACCEPTED);
1603 x25->state = X25_STATE_3; 1603 x25->state = X25_STATE_3;
1604 release_sock(sk);
1605 rc = 0; 1604 rc = 0;
1605out_sendcallaccpt_release:
1606 release_sock(sk);
1606 break; 1607 break;
1607 } 1608 }
1608 1609
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 66c638730c7a..b8253250d723 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -156,6 +156,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
156 case X25_FAC_CALLING_AE: 156 case X25_FAC_CALLING_AE:
157 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) 157 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
158 return -1; 158 return -1;
159 if (p[2] > X25_MAX_AE_LEN)
160 return -1;
159 dte_facs->calling_len = p[2]; 161 dte_facs->calling_len = p[2];
160 memcpy(dte_facs->calling_ae, &p[3], p[1] - 1); 162 memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
161 *vc_fac_mask |= X25_MASK_CALLING_AE; 163 *vc_fac_mask |= X25_MASK_CALLING_AE;
@@ -163,6 +165,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
163 case X25_FAC_CALLED_AE: 165 case X25_FAC_CALLED_AE:
164 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) 166 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
165 return -1; 167 return -1;
168 if (p[2] > X25_MAX_AE_LEN)
169 return -1;
166 dte_facs->called_len = p[2]; 170 dte_facs->called_len = p[2];
167 memcpy(dte_facs->called_ae, &p[3], p[1] - 1); 171 memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
168 *vc_fac_mask |= X25_MASK_CALLED_AE; 172 *vc_fac_mask |= X25_MASK_CALLED_AE;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ab2bb42fe094..88843996f935 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -163,6 +163,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
163 skb->sp->xvec[skb->sp->len++] = x; 163 skb->sp->xvec[skb->sp->len++] = x;
164 164
165 spin_lock(&x->lock); 165 spin_lock(&x->lock);
166 if (unlikely(x->km.state == XFRM_STATE_ACQ)) {
167 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
168 goto drop_unlock;
169 }
170
166 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 171 if (unlikely(x->km.state != XFRM_STATE_VALID)) {
167 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); 172 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID);
168 goto drop_unlock; 173 goto drop_unlock;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 0cf003dfa8fc..3bb2cdc13b46 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -89,7 +89,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
89 89
90 err = x->type->output(x, skb); 90 err = x->type->output(x, skb);
91 if (err == -EINPROGRESS) 91 if (err == -EINPROGRESS)
92 goto out_exit; 92 goto out;
93 93
94resume: 94resume:
95 if (err) { 95 if (err) {
@@ -107,15 +107,14 @@ resume:
107 x = dst->xfrm; 107 x = dst->xfrm;
108 } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); 108 } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
109 109
110 err = 0; 110 return 0;
111 111
112out_exit:
113 return err;
114error: 112error:
115 spin_unlock_bh(&x->lock); 113 spin_unlock_bh(&x->lock);
116error_nolock: 114error_nolock:
117 kfree_skb(skb); 115 kfree_skb(skb);
118 goto out_exit; 116out:
117 return err;
119} 118}
120 119
121int xfrm_output_resume(struct sk_buff *skb, int err) 120int xfrm_output_resume(struct sk_buff *skb, int err)
@@ -215,5 +214,26 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
215 return inner_mode->afinfo->extract_output(x, skb); 214 return inner_mode->afinfo->extract_output(x, skb);
216} 215}
217 216
217void xfrm_local_error(struct sk_buff *skb, int mtu)
218{
219 unsigned int proto;
220 struct xfrm_state_afinfo *afinfo;
221
222 if (skb->protocol == htons(ETH_P_IP))
223 proto = AF_INET;
224 else if (skb->protocol == htons(ETH_P_IPV6))
225 proto = AF_INET6;
226 else
227 return;
228
229 afinfo = xfrm_state_get_afinfo(proto);
230 if (!afinfo)
231 return;
232
233 afinfo->local_error(skb, mtu);
234 xfrm_state_put_afinfo(afinfo);
235}
236
218EXPORT_SYMBOL_GPL(xfrm_output); 237EXPORT_SYMBOL_GPL(xfrm_output);
219EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); 238EXPORT_SYMBOL_GPL(xfrm_inner_extract_output);
239EXPORT_SYMBOL_GPL(xfrm_local_error);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ea970b8002a2..ed38d5d81f9e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -308,7 +308,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
308{ 308{
309 BUG_ON(!policy->walk.dead); 309 BUG_ON(!policy->walk.dead);
310 310
311 if (del_timer(&policy->timer)) 311 if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
312 BUG(); 312 BUG();
313 313
314 security_xfrm_policy_free(policy->security); 314 security_xfrm_policy_free(policy->security);
@@ -320,10 +320,8 @@ static void xfrm_queue_purge(struct sk_buff_head *list)
320{ 320{
321 struct sk_buff *skb; 321 struct sk_buff *skb;
322 322
323 while ((skb = skb_dequeue(list)) != NULL) { 323 while ((skb = skb_dequeue(list)) != NULL)
324 dev_put(skb->dev);
325 kfree_skb(skb); 324 kfree_skb(skb);
326 }
327} 325}
328 326
329/* Rule must be locked. Release descentant resources, announce 327/* Rule must be locked. Release descentant resources, announce
@@ -660,7 +658,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
660 xfrm_pol_hold(policy); 658 xfrm_pol_hold(policy);
661 net->xfrm.policy_count[dir]++; 659 net->xfrm.policy_count[dir]++;
662 atomic_inc(&flow_cache_genid); 660 atomic_inc(&flow_cache_genid);
663 rt_genid_bump(net); 661
662 /* After previous checking, family can either be AF_INET or AF_INET6 */
663 if (policy->family == AF_INET)
664 rt_genid_bump_ipv4(net);
665 else
666 rt_genid_bump_ipv6(net);
667
664 if (delpol) { 668 if (delpol) {
665 xfrm_policy_requeue(delpol, policy); 669 xfrm_policy_requeue(delpol, policy);
666 __xfrm_policy_unlink(delpol, dir); 670 __xfrm_policy_unlink(delpol, dir);
@@ -1758,7 +1762,6 @@ static void xfrm_policy_queue_process(unsigned long arg)
1758 struct sk_buff *skb; 1762 struct sk_buff *skb;
1759 struct sock *sk; 1763 struct sock *sk;
1760 struct dst_entry *dst; 1764 struct dst_entry *dst;
1761 struct net_device *dev;
1762 struct xfrm_policy *pol = (struct xfrm_policy *)arg; 1765 struct xfrm_policy *pol = (struct xfrm_policy *)arg;
1763 struct xfrm_policy_queue *pq = &pol->polq; 1766 struct xfrm_policy_queue *pq = &pol->polq;
1764 struct flowi fl; 1767 struct flowi fl;
@@ -1805,7 +1808,6 @@ static void xfrm_policy_queue_process(unsigned long arg)
1805 dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, 1808 dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
1806 &fl, skb->sk, 0); 1809 &fl, skb->sk, 0);
1807 if (IS_ERR(dst)) { 1810 if (IS_ERR(dst)) {
1808 dev_put(skb->dev);
1809 kfree_skb(skb); 1811 kfree_skb(skb);
1810 continue; 1812 continue;
1811 } 1813 }
@@ -1814,9 +1816,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
1814 skb_dst_drop(skb); 1816 skb_dst_drop(skb);
1815 skb_dst_set(skb, dst); 1817 skb_dst_set(skb, dst);
1816 1818
1817 dev = skb->dev;
1818 err = dst_output(skb); 1819 err = dst_output(skb);
1819 dev_put(dev);
1820 } 1820 }
1821 1821
1822 return; 1822 return;
@@ -1839,7 +1839,6 @@ static int xdst_queue_output(struct sk_buff *skb)
1839 } 1839 }
1840 1840
1841 skb_dst_force(skb); 1841 skb_dst_force(skb);
1842 dev_hold(skb->dev);
1843 1842
1844 spin_lock_bh(&pq->hold_queue.lock); 1843 spin_lock_bh(&pq->hold_queue.lock);
1845 1844
@@ -2126,8 +2125,6 @@ restart:
2126 * have the xfrm_state's. We need to wait for KM to 2125 * have the xfrm_state's. We need to wait for KM to
2127 * negotiate new SA's or bail out with error.*/ 2126 * negotiate new SA's or bail out with error.*/
2128 if (net->xfrm.sysctl_larval_drop) { 2127 if (net->xfrm.sysctl_larval_drop) {
2129 /* EREMOTE tells the caller to generate
2130 * a one-shot blackhole route. */
2131 dst_release(dst); 2128 dst_release(dst);
2132 xfrm_pols_put(pols, drop_pols); 2129 xfrm_pols_put(pols, drop_pols);
2133 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); 2130 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
@@ -2785,7 +2782,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net)
2785 2782
2786static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 2783static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2787{ 2784{
2788 struct net_device *dev = ptr; 2785 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2789 2786
2790 switch (event) { 2787 switch (event) {
2791 case NETDEV_DOWN: 2788 case NETDEV_DOWN:
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index c721b0d9ab8b..80cd1e55b834 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -44,6 +44,7 @@ static const struct snmp_mib xfrm_mib_list[] = {
44 SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), 44 SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),
45 SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), 45 SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR),
46 SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), 46 SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID),
47 SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR),
47 SNMP_MIB_SENTINEL 48 SNMP_MIB_SENTINEL
48}; 49};
49 50
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 78f66fa92449..b9c3f9e943a9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -39,9 +39,6 @@ static DEFINE_SPINLOCK(xfrm_state_lock);
39 39
40static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; 40static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
41 41
42static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
43static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
44
45static inline unsigned int xfrm_dst_hash(struct net *net, 42static inline unsigned int xfrm_dst_hash(struct net *net,
46 const xfrm_address_t *daddr, 43 const xfrm_address_t *daddr,
47 const xfrm_address_t *saddr, 44 const xfrm_address_t *saddr,
@@ -499,7 +496,8 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
499 INIT_HLIST_NODE(&x->bydst); 496 INIT_HLIST_NODE(&x->bydst);
500 INIT_HLIST_NODE(&x->bysrc); 497 INIT_HLIST_NODE(&x->bysrc);
501 INIT_HLIST_NODE(&x->byspi); 498 INIT_HLIST_NODE(&x->byspi);
502 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_REALTIME, HRTIMER_MODE_ABS); 499 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
500 CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
503 setup_timer(&x->rtimer, xfrm_replay_timer_handler, 501 setup_timer(&x->rtimer, xfrm_replay_timer_handler,
504 (unsigned long)x); 502 (unsigned long)x);
505 x->curlft.add_time = get_seconds(); 503 x->curlft.add_time = get_seconds();
@@ -990,11 +988,13 @@ void xfrm_state_insert(struct xfrm_state *x)
990EXPORT_SYMBOL(xfrm_state_insert); 988EXPORT_SYMBOL(xfrm_state_insert);
991 989
992/* xfrm_state_lock is held */ 990/* xfrm_state_lock is held */
993static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, 991static struct xfrm_state *__find_acq_core(struct net *net,
992 const struct xfrm_mark *m,
994 unsigned short family, u8 mode, 993 unsigned short family, u8 mode,
995 u32 reqid, u8 proto, 994 u32 reqid, u8 proto,
996 const xfrm_address_t *daddr, 995 const xfrm_address_t *daddr,
997 const xfrm_address_t *saddr, int create) 996 const xfrm_address_t *saddr,
997 int create)
998{ 998{
999 unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); 999 unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1000 struct xfrm_state *x; 1000 struct xfrm_state *x;
@@ -1399,9 +1399,9 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1399EXPORT_SYMBOL(xfrm_state_lookup_byaddr); 1399EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1400 1400
1401struct xfrm_state * 1401struct xfrm_state *
1402xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, 1402xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
1403 const xfrm_address_t *daddr, const xfrm_address_t *saddr, 1403 u8 proto, const xfrm_address_t *daddr,
1404 int create, unsigned short family) 1404 const xfrm_address_t *saddr, int create, unsigned short family)
1405{ 1405{
1406 struct xfrm_state *x; 1406 struct xfrm_state *x;
1407 1407
@@ -1860,7 +1860,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1860} 1860}
1861EXPORT_SYMBOL(xfrm_state_unregister_afinfo); 1861EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1862 1862
1863static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) 1863struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
1864{ 1864{
1865 struct xfrm_state_afinfo *afinfo; 1865 struct xfrm_state_afinfo *afinfo;
1866 if (unlikely(family >= NPROTO)) 1866 if (unlikely(family >= NPROTO))
@@ -1872,7 +1872,7 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
1872 return afinfo; 1872 return afinfo;
1873} 1873}
1874 1874
1875static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) 1875void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1876{ 1876{
1877 rcu_read_unlock(); 1877 rcu_read_unlock();
1878} 1878}