aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c4
-rw-r--r--net/8021q/Kconfig2
-rw-r--r--net/8021q/vlan.c97
-rw-r--r--net/8021q/vlan.h58
-rw-r--r--net/8021q/vlan_core.c80
-rw-r--r--net/8021q/vlan_dev.c11
-rw-r--r--net/8021q/vlan_gvrp.c4
-rw-r--r--net/8021q/vlan_mvrp.c4
-rw-r--r--net/8021q/vlan_netlink.c32
-rw-r--r--net/Kconfig10
-rw-r--r--net/atm/lec.h2
-rw-r--r--net/batman-adv/Kconfig14
-rw-r--r--net/batman-adv/Makefile3
-rw-r--r--net/batman-adv/bat_iv_ogm.c5
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c2
-rw-r--r--net/batman-adv/debugfs.c18
-rw-r--r--net/batman-adv/distributed-arp-table.c22
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/hard-interface.c66
-rw-r--r--net/batman-adv/hard-interface.h13
-rw-r--r--net/batman-adv/main.c16
-rw-r--r--net/batman-adv/main.h15
-rw-r--r--net/batman-adv/network-coding.c1822
-rw-r--r--net/batman-adv/network-coding.h123
-rw-r--r--net/batman-adv/originator.c10
-rw-r--r--net/batman-adv/packet.h33
-rw-r--r--net/batman-adv/routing.c50
-rw-r--r--net/batman-adv/send.c5
-rw-r--r--net/batman-adv/soft-interface.c281
-rw-r--r--net/batman-adv/soft-interface.h3
-rw-r--r--net/batman-adv/sysfs.c22
-rw-r--r--net/batman-adv/translation-table.c29
-rw-r--r--net/batman-adv/types.h136
-rw-r--r--net/batman-adv/unicast.c6
-rw-r--r--net/batman-adv/vis.c4
-rw-r--r--net/bluetooth/a2mp.c6
-rw-r--r--net/bluetooth/af_bluetooth.c18
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bluetooth/bnep/sock.c4
-rw-r--r--net/bluetooth/cmtp/sock.c4
-rw-r--r--net/bluetooth/hci_conn.c4
-rw-r--r--net/bluetooth/hci_core.c727
-rw-r--r--net/bluetooth/hci_event.c605
-rw-r--r--net/bluetooth/hci_sock.c9
-rw-r--r--net/bluetooth/hci_sysfs.c4
-rw-r--r--net/bluetooth/hidp/core.c4
-rw-r--r--net/bluetooth/hidp/sock.c4
-rw-r--r--net/bluetooth/l2cap_sock.c4
-rw-r--r--net/bluetooth/mgmt.c680
-rw-r--r--net/bluetooth/rfcomm/core.c167
-rw-r--r--net/bluetooth/rfcomm/sock.c3
-rw-r--r--net/bluetooth/sco.c3
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_fdb.c14
-rw-r--r--net/bridge/br_if.c1
-rw-r--r--net/bridge/br_mdb.c4
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/bridge/br_netfilter.c3
-rw-r--r--net/bridge/br_netlink.c21
-rw-r--r--net/bridge/br_vlan.c20
-rw-r--r--net/bridge/netfilter/ebt_log.c44
-rw-r--r--net/bridge/netfilter/ebt_nflog.c5
-rw-r--r--net/bridge/netfilter/ebt_ulog.c132
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/caif/caif_dev.c9
-rw-r--r--net/caif/caif_socket.c22
-rw-r--r--net/caif/caif_usb.c4
-rw-r--r--net/caif/cfcnfg.c19
-rw-r--r--net/caif/cfctrl.c14
-rw-r--r--net/caif/cffrml.c4
-rw-r--r--net/caif/cfmuxl.c4
-rw-r--r--net/caif/cfpkt_skbuff.c8
-rw-r--r--net/caif/cfrfml.c4
-rw-r--r--net/caif/cfserl.c4
-rw-r--r--net/caif/cfsrvl.c13
-rw-r--r--net/caif/chnl_net.c6
-rw-r--r--net/can/af_can.c30
-rw-r--r--net/can/gw.c5
-rw-r--r--net/can/raw.c5
-rw-r--r--net/core/datagram.c4
-rw-r--r--net/core/dev.c108
-rw-r--r--net/core/dev_addr_lists.c210
-rw-r--r--net/core/dst.c9
-rw-r--r--net/core/ethtool.c29
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c5
-rw-r--r--net/core/flow.c42
-rw-r--r--net/core/flow_dissector.c68
-rw-r--r--net/core/neighbour.c55
-rw-r--r--net/core/net-procfs.c2
-rw-r--r--net/core/netpoll.c8
-rw-r--r--net/core/rtnetlink.c176
-rw-r--r--net/core/scm.c20
-rw-r--r--net/core/skbuff.c81
-rw-r--r--net/core/sock.c22
-rw-r--r--net/core/utils.c5
-rw-r--r--net/dcb/dcbevent.c1
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/ipv4.c5
-rw-r--r--net/dccp/ipv6.c5
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/dn_fib.c203
-rw-r--r--net/decnet/dn_route.c43
-rw-r--r--net/decnet/dn_table.c45
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c12
-rw-r--r--net/dsa/dsa.c233
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ieee802154/6lowpan.c142
-rw-r--r--net/ieee802154/6lowpan.h7
-rw-r--r--net/ieee802154/dgram.c10
-rw-r--r--net/ieee802154/netlink.c8
-rw-r--r--net/ieee802154/nl-mac.c25
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c10
-rw-r--r--net/ipv4/arp.c27
-rw-r--r--net/ipv4/devinet.c83
-rw-r--r--net/ipv4/fib_frontend.c10
-rw-r--r--net/ipv4/gre.c5
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c10
-rw-r--r--net/ipv4/inet_fragment.c84
-rw-r--r--net/ipv4/inet_lro.c5
-rw-r--r--net/ipv4/ip_fragment.c31
-rw-r--r--net/ipv4/ip_gre.c1517
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_tunnel.c1035
-rw-r--r--net/ipv4/ip_vti.c42
-rw-r--r--net/ipv4/ipcomp.c1
-rw-r--r--net/ipv4/ipconfig.c13
-rw-r--r--net/ipv4/ipip.c748
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/netfilter.c15
-rw-r--r--net/ipv4/netfilter/Kconfig2
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/arptable_filter.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c10
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c134
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c9
-rw-r--r--net/ipv4/netfilter/iptable_nat.c23
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/ping.c5
-rw-r--r--net/ipv4/proc.c3
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c18
-rw-r--r--net/ipv4/tcp.c287
-rw-r--r--net/ipv4/tcp_input.c606
-rw-r--r--net/ipv4/tcp_ipv4.c108
-rw-r--r--net/ipv4/tcp_memcontrol.c3
-rw-r--r--net/ipv4/tcp_minisocks.c44
-rw-r--r--net/ipv4/tcp_output.c376
-rw-r--r--net/ipv4/tcp_timer.c21
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/udp.c121
-rw-r--r--net/ipv4/udp_diag.c10
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c8
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/addrconf.c208
-rw-r--r--net/ipv6/addrlabel.c12
-rw-r--r--net/ipv6/af_inet6.c7
-rw-r--r--net/ipv6/datagram.c20
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/inet6_connection_sock.c10
-rw-r--r--net/ipv6/ip6_flowlabel.c11
-rw-r--r--net/ipv6/ip6_gre.c62
-rw-r--r--net/ipv6/ip6_offload.c4
-rw-r--r--net/ipv6/ip6_output.c7
-rw-r--r--net/ipv6/ip6_tunnel.c16
-rw-r--r--net/ipv6/ip6mr.c10
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/netfilter.c12
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c4
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c11
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c9
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c23
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c22
-rw-r--r--net/ipv6/raw.c9
-rw-r--r--net/ipv6/reassembly.c23
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/sit.c41
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/tcp_ipv6.c56
-rw-r--r--net/ipv6/udp.c13
-rw-r--r--net/ipv6/udp_offload.c8
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c7
-rw-r--r--net/irda/af_irda.c5
-rw-r--r--net/irda/ircomm/ircomm_core.c2
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/mac80211/cfg.c178
-rw-r--r--net/mac80211/debugfs_key.c10
-rw-r--r--net/mac80211/debugfs_netdev.c22
-rw-r--r--net/mac80211/debugfs_sta.c31
-rw-r--r--net/mac80211/driver-ops.h67
-rw-r--r--net/mac80211/ht.c52
-rw-r--r--net/mac80211/ibss.c29
-rw-r--r--net/mac80211/ieee80211_i.h37
-rw-r--r--net/mac80211/iface.c29
-rw-r--r--net/mac80211/key.c208
-rw-r--r--net/mac80211/key.h18
-rw-r--r--net/mac80211/main.c77
-rw-r--r--net/mac80211/mesh.c64
-rw-r--r--net/mac80211/mesh.h12
-rw-r--r--net/mac80211/mesh_plink.c37
-rw-r--r--net/mac80211/mlme.c112
-rw-r--r--net/mac80211/offchannel.c10
-rw-r--r--net/mac80211/pm.c123
-rw-r--r--net/mac80211/rc80211_minstrel.c204
-rw-r--r--net/mac80211/rc80211_minstrel.h31
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c12
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c103
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h6
-rw-r--r--net/mac80211/rx.c61
-rw-r--r--net/mac80211/scan.c7
-rw-r--r--net/mac80211/sta_info.c56
-rw-r--r--net/mac80211/sta_info.h11
-rw-r--r--net/mac80211/trace.h46
-rw-r--r--net/mac80211/tx.c10
-rw-r--r--net/mac80211/util.c121
-rw-r--r--net/mac80211/vht.c212
-rw-r--r--net/mac802154/mac802154.h3
-rw-r--r--net/mac802154/mac_cmd.c1
-rw-r--r--net/mac802154/mib.c21
-rw-r--r--net/mac802154/tx.c29
-rw-r--r--net/mac802154/wpan.c4
-rw-r--r--net/netfilter/core.c30
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c31
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c306
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c81
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c649
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c115
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c190
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c55
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c36
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c40
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c33
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c64
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c63
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c35
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c176
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c1050
-rw-r--r--net/netfilter/nf_conntrack_amanda.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c56
-rw-r--r--net/netfilter/nf_conntrack_ecache.c8
-rw-r--r--net/netfilter/nf_conntrack_expect.c9
-rw-r--r--net/netfilter/nf_conntrack_ftp.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c1
-rw-r--r--net/netfilter/nf_conntrack_helper.c3
-rw-r--r--net/netfilter/nf_conntrack_irc.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c100
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c8
-rw-r--r--net/netfilter/nf_conntrack_standalone.c17
-rw-r--r--net/netfilter/nf_conntrack_tftp.c2
-rw-r--r--net/netfilter/nf_log.c206
-rw-r--r--net/netfilter/nf_nat_amanda.c1
-rw-r--r--net/netfilter/nf_nat_core.c9
-rw-r--r--net/netfilter/nf_nat_helper.c1
-rw-r--r--net/netfilter/nf_queue.c5
-rw-r--r--net/netfilter/nfnetlink.c27
-rw-r--r--net/netfilter/nfnetlink_log.c199
-rw-r--r--net/netfilter/nfnetlink_queue_core.c274
-rw-r--r--net/netfilter/x_tables.c1
-rw-r--r--net/netfilter/xt_LOG.c52
-rw-r--r--net/netfilter/xt_NFQUEUE.c63
-rw-r--r--net/netfilter/xt_TCPMSS.c1
-rw-r--r--net/netfilter/xt_conntrack.c1
-rw-r--r--net/netfilter/xt_hashlimit.c1
-rw-r--r--net/netfilter/xt_limit.c1
-rw-r--r--net/netfilter/xt_osf.c6
-rw-r--r--net/netlink/Kconfig10
-rw-r--r--net/netlink/Makefile3
-rw-r--r--net/netlink/af_netlink.c899
-rw-r--r--net/netlink/af_netlink.h82
-rw-r--r--net/netlink/diag.c220
-rw-r--r--net/nfc/llcp/commands.c205
-rw-r--r--net/nfc/llcp/llcp.c112
-rw-r--r--net/nfc/llcp/llcp.h36
-rw-r--r--net/nfc/llcp/sock.c133
-rw-r--r--net/nfc/netlink.c172
-rw-r--r--net/nfc/nfc.h14
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/datapath.c406
-rw-r--r--net/openvswitch/datapath.h72
-rw-r--r--net/openvswitch/dp_notify.c82
-rw-r--r--net/openvswitch/flow.c8
-rw-r--r--net/openvswitch/flow.h21
-rw-r--r--net/openvswitch/vport-internal_dev.c21
-rw-r--r--net/openvswitch/vport-netdev.c8
-rw-r--r--net/openvswitch/vport.c58
-rw-r--r--net/openvswitch/vport.h19
-rw-r--r--net/packet/af_packet.c220
-rw-r--r--net/packet/internal.h3
-rw-r--r--net/phonet/pn_netlink.c4
-rw-r--r--net/rfkill/rfkill-regulator.c2
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_csum.c39
-rw-r--r--net/sched/cls_api.c14
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/sch_api.c44
-rw-r--r--net/sched/sch_htb.c31
-rw-r--r--net/sctp/associola.c12
-rw-r--r--net/sctp/bind_addr.c7
-rw-r--r--net/sctp/endpointola.c14
-rw-r--r--net/sctp/inqueue.c7
-rw-r--r--net/sctp/output.c5
-rw-r--r--net/sctp/outqueue.c11
-rw-r--r--net/sctp/proc.c12
-rw-r--r--net/sctp/socket.c8
-rw-r--r--net/sctp/ssnmap.c23
-rw-r--r--net/sctp/transport.c1
-rw-r--r--net/sctp/ulpqueue.c3
-rw-r--r--net/socket.c23
-rw-r--r--net/tipc/Kconfig7
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/bcast.c4
-rw-r--r--net/tipc/bearer.c7
-rw-r--r--net/tipc/bearer.h16
-rw-r--r--net/tipc/core.c12
-rw-r--r--net/tipc/discover.c2
-rw-r--r--net/tipc/eth_media.c39
-rw-r--r--net/tipc/ib_media.c387
-rw-r--r--net/tipc/netlink.c6
-rw-r--r--net/unix/af_unix.c20
-rw-r--r--net/vmw_vsock/vmci_transport.c16
-rw-r--r--net/vmw_vsock/vmci_transport.h3
-rw-r--r--net/wireless/ap.c62
-rw-r--r--net/wireless/core.c77
-rw-r--r--net/wireless/core.h22
-rw-r--r--net/wireless/mesh.c15
-rw-r--r--net/wireless/mlme.c230
-rw-r--r--net/wireless/nl80211.c1857
-rw-r--r--net/wireless/nl80211.h68
-rw-r--r--net/wireless/rdev-ops.h20
-rw-r--r--net/wireless/reg.c6
-rw-r--r--net/wireless/sme.c28
-rw-r--r--net/wireless/sysfs.c25
-rw-r--r--net/wireless/trace.h46
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/xfrm/xfrm_policy.c23
-rw-r--r--net/xfrm/xfrm_state.c1
-rw-r--r--net/xfrm/xfrm_user.c17
370 files changed, 15723 insertions, 10061 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 8456f5d98b85..5d9630a0eb93 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -609,8 +609,12 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
609 /* Delete timer and generate a final TRANSMIT_PDU event to flush out 609 /* Delete timer and generate a final TRANSMIT_PDU event to flush out
610 * all pending messages before the applicant is gone. */ 610 * all pending messages before the applicant is gone. */
611 del_timer_sync(&app->join_timer); 611 del_timer_sync(&app->join_timer);
612
613 spin_lock_bh(&app->lock);
612 garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); 614 garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
613 garp_pdu_queue(app); 615 garp_pdu_queue(app);
616 spin_unlock_bh(&app->lock);
617
614 garp_queue_xmit(app); 618 garp_queue_xmit(app);
615 619
616 dev_mc_del(dev, appl->proto.group_address); 620 dev_mc_del(dev, appl->proto.group_address);
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index 8f7517df41a5..b85a91fa61f1 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4 4
5config VLAN_8021Q 5config VLAN_8021Q
6 tristate "802.1Q VLAN Support" 6 tristate "802.1Q/802.1ad VLAN Support"
7 ---help--- 7 ---help---
8 Select this and you will be able to create 802.1Q VLAN interfaces 8 Select this and you will be able to create 802.1Q VLAN interfaces
9 on your ethernet interfaces. 802.1Q VLAN supports almost 9 on your ethernet interfaces. 802.1Q VLAN supports almost
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 85addcd9372b..9424f3718ea7 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -51,14 +51,18 @@ const char vlan_version[] = DRV_VERSION;
51 51
52/* End of global variables definitions. */ 52/* End of global variables definitions. */
53 53
54static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) 54static int vlan_group_prealloc_vid(struct vlan_group *vg,
55 __be16 vlan_proto, u16 vlan_id)
55{ 56{
56 struct net_device **array; 57 struct net_device **array;
58 unsigned int pidx, vidx;
57 unsigned int size; 59 unsigned int size;
58 60
59 ASSERT_RTNL(); 61 ASSERT_RTNL();
60 62
61 array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; 63 pidx = vlan_proto_idx(vlan_proto);
64 vidx = vlan_id / VLAN_GROUP_ARRAY_PART_LEN;
65 array = vg->vlan_devices_arrays[pidx][vidx];
62 if (array != NULL) 66 if (array != NULL)
63 return 0; 67 return 0;
64 68
@@ -67,7 +71,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
67 if (array == NULL) 71 if (array == NULL)
68 return -ENOBUFS; 72 return -ENOBUFS;
69 73
70 vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array; 74 vg->vlan_devices_arrays[pidx][vidx] = array;
71 return 0; 75 return 0;
72} 76}
73 77
@@ -93,7 +97,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
93 if (vlan->flags & VLAN_FLAG_GVRP) 97 if (vlan->flags & VLAN_FLAG_GVRP)
94 vlan_gvrp_request_leave(dev); 98 vlan_gvrp_request_leave(dev);
95 99
96 vlan_group_set_device(grp, vlan_id, NULL); 100 vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL);
97 /* Because unregister_netdevice_queue() makes sure at least one rcu 101 /* Because unregister_netdevice_queue() makes sure at least one rcu
98 * grace period is respected before device freeing, 102 * grace period is respected before device freeing,
99 * we dont need to call synchronize_net() here. 103 * we dont need to call synchronize_net() here.
@@ -112,13 +116,14 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
112 * VLAN is not 0 (leave it there for 802.1p). 116 * VLAN is not 0 (leave it there for 802.1p).
113 */ 117 */
114 if (vlan_id) 118 if (vlan_id)
115 vlan_vid_del(real_dev, vlan_id); 119 vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
116 120
117 /* Get rid of the vlan's reference to real_dev */ 121 /* Get rid of the vlan's reference to real_dev */
118 dev_put(real_dev); 122 dev_put(real_dev);
119} 123}
120 124
121int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) 125int vlan_check_real_dev(struct net_device *real_dev,
126 __be16 protocol, u16 vlan_id)
122{ 127{
123 const char *name = real_dev->name; 128 const char *name = real_dev->name;
124 129
@@ -127,7 +132,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
127 return -EOPNOTSUPP; 132 return -EOPNOTSUPP;
128 } 133 }
129 134
130 if (vlan_find_dev(real_dev, vlan_id) != NULL) 135 if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL)
131 return -EEXIST; 136 return -EEXIST;
132 137
133 return 0; 138 return 0;
@@ -142,7 +147,7 @@ int register_vlan_dev(struct net_device *dev)
142 struct vlan_group *grp; 147 struct vlan_group *grp;
143 int err; 148 int err;
144 149
145 err = vlan_vid_add(real_dev, vlan_id); 150 err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id);
146 if (err) 151 if (err)
147 return err; 152 return err;
148 153
@@ -160,7 +165,7 @@ int register_vlan_dev(struct net_device *dev)
160 goto out_uninit_gvrp; 165 goto out_uninit_gvrp;
161 } 166 }
162 167
163 err = vlan_group_prealloc_vid(grp, vlan_id); 168 err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id);
164 if (err < 0) 169 if (err < 0)
165 goto out_uninit_mvrp; 170 goto out_uninit_mvrp;
166 171
@@ -181,7 +186,7 @@ int register_vlan_dev(struct net_device *dev)
181 /* So, got the sucker initialized, now lets place 186 /* So, got the sucker initialized, now lets place
182 * it into our local structure. 187 * it into our local structure.
183 */ 188 */
184 vlan_group_set_device(grp, vlan_id, dev); 189 vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev);
185 grp->nr_vlan_devs++; 190 grp->nr_vlan_devs++;
186 191
187 return 0; 192 return 0;
@@ -195,7 +200,7 @@ out_uninit_gvrp:
195 if (grp->nr_vlan_devs == 0) 200 if (grp->nr_vlan_devs == 0)
196 vlan_gvrp_uninit_applicant(real_dev); 201 vlan_gvrp_uninit_applicant(real_dev);
197out_vid_del: 202out_vid_del:
198 vlan_vid_del(real_dev, vlan_id); 203 vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
199 return err; 204 return err;
200} 205}
201 206
@@ -213,7 +218,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
213 if (vlan_id >= VLAN_VID_MASK) 218 if (vlan_id >= VLAN_VID_MASK)
214 return -ERANGE; 219 return -ERANGE;
215 220
216 err = vlan_check_real_dev(real_dev, vlan_id); 221 err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id);
217 if (err < 0) 222 if (err < 0)
218 return err; 223 return err;
219 224
@@ -255,6 +260,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
255 new_dev->mtu = real_dev->mtu; 260 new_dev->mtu = real_dev->mtu;
256 new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT); 261 new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
257 262
263 vlan_dev_priv(new_dev)->vlan_proto = htons(ETH_P_8021Q);
258 vlan_dev_priv(new_dev)->vlan_id = vlan_id; 264 vlan_dev_priv(new_dev)->vlan_id = vlan_id;
259 vlan_dev_priv(new_dev)->real_dev = real_dev; 265 vlan_dev_priv(new_dev)->real_dev = real_dev;
260 vlan_dev_priv(new_dev)->dent = NULL; 266 vlan_dev_priv(new_dev)->dent = NULL;
@@ -301,7 +307,7 @@ static void vlan_transfer_features(struct net_device *dev,
301{ 307{
302 vlandev->gso_max_size = dev->gso_max_size; 308 vlandev->gso_max_size = dev->gso_max_size;
303 309
304 if (dev->features & NETIF_F_HW_VLAN_TX) 310 if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
305 vlandev->hard_header_len = dev->hard_header_len; 311 vlandev->hard_header_len = dev->hard_header_len;
306 else 312 else
307 vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; 313 vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
@@ -341,16 +347,17 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
341 int i, flgs; 347 int i, flgs;
342 struct net_device *vlandev; 348 struct net_device *vlandev;
343 struct vlan_dev_priv *vlan; 349 struct vlan_dev_priv *vlan;
350 bool last = false;
344 LIST_HEAD(list); 351 LIST_HEAD(list);
345 352
346 if (is_vlan_dev(dev)) 353 if (is_vlan_dev(dev))
347 __vlan_device_event(dev, event); 354 __vlan_device_event(dev, event);
348 355
349 if ((event == NETDEV_UP) && 356 if ((event == NETDEV_UP) &&
350 (dev->features & NETIF_F_HW_VLAN_FILTER)) { 357 (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
351 pr_info("adding VLAN 0 to HW filter on device %s\n", 358 pr_info("adding VLAN 0 to HW filter on device %s\n",
352 dev->name); 359 dev->name);
353 vlan_vid_add(dev, 0); 360 vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
354 } 361 }
355 362
356 vlan_info = rtnl_dereference(dev->vlan_info); 363 vlan_info = rtnl_dereference(dev->vlan_info);
@@ -365,22 +372,13 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
365 switch (event) { 372 switch (event) {
366 case NETDEV_CHANGE: 373 case NETDEV_CHANGE:
367 /* Propagate real device state to vlan devices */ 374 /* Propagate real device state to vlan devices */
368 for (i = 0; i < VLAN_N_VID; i++) { 375 vlan_group_for_each_dev(grp, i, vlandev)
369 vlandev = vlan_group_get_device(grp, i);
370 if (!vlandev)
371 continue;
372
373 netif_stacked_transfer_operstate(dev, vlandev); 376 netif_stacked_transfer_operstate(dev, vlandev);
374 }
375 break; 377 break;
376 378
377 case NETDEV_CHANGEADDR: 379 case NETDEV_CHANGEADDR:
378 /* Adjust unicast filters on underlying device */ 380 /* Adjust unicast filters on underlying device */
379 for (i = 0; i < VLAN_N_VID; i++) { 381 vlan_group_for_each_dev(grp, i, vlandev) {
380 vlandev = vlan_group_get_device(grp, i);
381 if (!vlandev)
382 continue;
383
384 flgs = vlandev->flags; 382 flgs = vlandev->flags;
385 if (!(flgs & IFF_UP)) 383 if (!(flgs & IFF_UP))
386 continue; 384 continue;
@@ -390,11 +388,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
390 break; 388 break;
391 389
392 case NETDEV_CHANGEMTU: 390 case NETDEV_CHANGEMTU:
393 for (i = 0; i < VLAN_N_VID; i++) { 391 vlan_group_for_each_dev(grp, i, vlandev) {
394 vlandev = vlan_group_get_device(grp, i);
395 if (!vlandev)
396 continue;
397
398 if (vlandev->mtu <= dev->mtu) 392 if (vlandev->mtu <= dev->mtu)
399 continue; 393 continue;
400 394
@@ -404,26 +398,16 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
404 398
405 case NETDEV_FEAT_CHANGE: 399 case NETDEV_FEAT_CHANGE:
406 /* Propagate device features to underlying device */ 400 /* Propagate device features to underlying device */
407 for (i = 0; i < VLAN_N_VID; i++) { 401 vlan_group_for_each_dev(grp, i, vlandev)
408 vlandev = vlan_group_get_device(grp, i);
409 if (!vlandev)
410 continue;
411
412 vlan_transfer_features(dev, vlandev); 402 vlan_transfer_features(dev, vlandev);
413 }
414
415 break; 403 break;
416 404
417 case NETDEV_DOWN: 405 case NETDEV_DOWN:
418 if (dev->features & NETIF_F_HW_VLAN_FILTER) 406 if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
419 vlan_vid_del(dev, 0); 407 vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
420 408
421 /* Put all VLANs for this dev in the down state too. */ 409 /* Put all VLANs for this dev in the down state too. */
422 for (i = 0; i < VLAN_N_VID; i++) { 410 vlan_group_for_each_dev(grp, i, vlandev) {
423 vlandev = vlan_group_get_device(grp, i);
424 if (!vlandev)
425 continue;
426
427 flgs = vlandev->flags; 411 flgs = vlandev->flags;
428 if (!(flgs & IFF_UP)) 412 if (!(flgs & IFF_UP))
429 continue; 413 continue;
@@ -437,11 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
437 421
438 case NETDEV_UP: 422 case NETDEV_UP:
439 /* Put all VLANs for this dev in the up state too. */ 423 /* Put all VLANs for this dev in the up state too. */
440 for (i = 0; i < VLAN_N_VID; i++) { 424 vlan_group_for_each_dev(grp, i, vlandev) {
441 vlandev = vlan_group_get_device(grp, i);
442 if (!vlandev)
443 continue;
444
445 flgs = vlandev->flags; 425 flgs = vlandev->flags;
446 if (flgs & IFF_UP) 426 if (flgs & IFF_UP)
447 continue; 427 continue;
@@ -458,17 +438,15 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
458 if (dev->reg_state != NETREG_UNREGISTERING) 438 if (dev->reg_state != NETREG_UNREGISTERING)
459 break; 439 break;
460 440
461 for (i = 0; i < VLAN_N_VID; i++) { 441 vlan_group_for_each_dev(grp, i, vlandev) {
462 vlandev = vlan_group_get_device(grp, i);
463 if (!vlandev)
464 continue;
465
466 /* removal of last vid destroys vlan_info, abort 442 /* removal of last vid destroys vlan_info, abort
467 * afterwards */ 443 * afterwards */
468 if (vlan_info->nr_vids == 1) 444 if (vlan_info->nr_vids == 1)
469 i = VLAN_N_VID; 445 last = true;
470 446
471 unregister_vlan_dev(vlandev, &list); 447 unregister_vlan_dev(vlandev, &list);
448 if (last)
449 break;
472 } 450 }
473 unregister_netdevice_many(&list); 451 unregister_netdevice_many(&list);
474 break; 452 break;
@@ -482,13 +460,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
482 case NETDEV_NOTIFY_PEERS: 460 case NETDEV_NOTIFY_PEERS:
483 case NETDEV_BONDING_FAILOVER: 461 case NETDEV_BONDING_FAILOVER:
484 /* Propagate to vlan devices */ 462 /* Propagate to vlan devices */
485 for (i = 0; i < VLAN_N_VID; i++) { 463 vlan_group_for_each_dev(grp, i, vlandev)
486 vlandev = vlan_group_get_device(grp, i);
487 if (!vlandev)
488 continue;
489
490 call_netdevice_notifiers(event, vlandev); 464 call_netdevice_notifiers(event, vlandev);
491 }
492 break; 465 break;
493 } 466 }
494 467
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 670f1e8cfc0f..ba5983f34c42 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -49,6 +49,7 @@ struct netpoll;
49 * @ingress_priority_map: ingress priority mappings 49 * @ingress_priority_map: ingress priority mappings
50 * @nr_egress_mappings: number of egress priority mappings 50 * @nr_egress_mappings: number of egress priority mappings
51 * @egress_priority_map: hash of egress priority mappings 51 * @egress_priority_map: hash of egress priority mappings
52 * @vlan_proto: VLAN encapsulation protocol
52 * @vlan_id: VLAN identifier 53 * @vlan_id: VLAN identifier
53 * @flags: device flags 54 * @flags: device flags
54 * @real_dev: underlying netdevice 55 * @real_dev: underlying netdevice
@@ -62,6 +63,7 @@ struct vlan_dev_priv {
62 unsigned int nr_egress_mappings; 63 unsigned int nr_egress_mappings;
63 struct vlan_priority_tci_mapping *egress_priority_map[16]; 64 struct vlan_priority_tci_mapping *egress_priority_map[16];
64 65
66 __be16 vlan_proto;
65 u16 vlan_id; 67 u16 vlan_id;
66 u16 flags; 68 u16 flags;
67 69
@@ -87,10 +89,17 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
87#define VLAN_GROUP_ARRAY_SPLIT_PARTS 8 89#define VLAN_GROUP_ARRAY_SPLIT_PARTS 8
88#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS) 90#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
89 91
92enum vlan_protos {
93 VLAN_PROTO_8021Q = 0,
94 VLAN_PROTO_8021AD,
95 VLAN_PROTO_NUM,
96};
97
90struct vlan_group { 98struct vlan_group {
91 unsigned int nr_vlan_devs; 99 unsigned int nr_vlan_devs;
92 struct hlist_node hlist; /* linked list */ 100 struct hlist_node hlist; /* linked list */
93 struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS]; 101 struct net_device **vlan_devices_arrays[VLAN_PROTO_NUM]
102 [VLAN_GROUP_ARRAY_SPLIT_PARTS];
94}; 103};
95 104
96struct vlan_info { 105struct vlan_info {
@@ -103,37 +112,67 @@ struct vlan_info {
103 struct rcu_head rcu; 112 struct rcu_head rcu;
104}; 113};
105 114
106static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, 115static inline unsigned int vlan_proto_idx(__be16 proto)
107 u16 vlan_id) 116{
117 switch (proto) {
118 case __constant_htons(ETH_P_8021Q):
119 return VLAN_PROTO_8021Q;
120 case __constant_htons(ETH_P_8021AD):
121 return VLAN_PROTO_8021AD;
122 default:
123 BUG();
124 return 0;
125 }
126}
127
128static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg,
129 unsigned int pidx,
130 u16 vlan_id)
108{ 131{
109 struct net_device **array; 132 struct net_device **array;
110 array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; 133
134 array = vg->vlan_devices_arrays[pidx]
135 [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
111 return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL; 136 return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
112} 137}
113 138
139static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
140 __be16 vlan_proto,
141 u16 vlan_id)
142{
143 return __vlan_group_get_device(vg, vlan_proto_idx(vlan_proto), vlan_id);
144}
145
114static inline void vlan_group_set_device(struct vlan_group *vg, 146static inline void vlan_group_set_device(struct vlan_group *vg,
115 u16 vlan_id, 147 __be16 vlan_proto, u16 vlan_id,
116 struct net_device *dev) 148 struct net_device *dev)
117{ 149{
118 struct net_device **array; 150 struct net_device **array;
119 if (!vg) 151 if (!vg)
120 return; 152 return;
121 array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; 153 array = vg->vlan_devices_arrays[vlan_proto_idx(vlan_proto)]
154 [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
122 array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev; 155 array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
123} 156}
124 157
125/* Must be invoked with rcu_read_lock or with RTNL. */ 158/* Must be invoked with rcu_read_lock or with RTNL. */
126static inline struct net_device *vlan_find_dev(struct net_device *real_dev, 159static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
127 u16 vlan_id) 160 __be16 vlan_proto, u16 vlan_id)
128{ 161{
129 struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info); 162 struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
130 163
131 if (vlan_info) 164 if (vlan_info)
132 return vlan_group_get_device(&vlan_info->grp, vlan_id); 165 return vlan_group_get_device(&vlan_info->grp,
166 vlan_proto, vlan_id);
133 167
134 return NULL; 168 return NULL;
135} 169}
136 170
171#define vlan_group_for_each_dev(grp, i, dev) \
172 for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \
173 if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \
174 (i) % VLAN_N_VID)))
175
137/* found in vlan_dev.c */ 176/* found in vlan_dev.c */
138void vlan_dev_set_ingress_priority(const struct net_device *dev, 177void vlan_dev_set_ingress_priority(const struct net_device *dev,
139 u32 skb_prio, u16 vlan_prio); 178 u32 skb_prio, u16 vlan_prio);
@@ -142,7 +181,8 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
142int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask); 181int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
143void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); 182void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
144 183
145int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id); 184int vlan_check_real_dev(struct net_device *real_dev,
185 __be16 protocol, u16 vlan_id);
146void vlan_setup(struct net_device *dev); 186void vlan_setup(struct net_device *dev);
147int register_vlan_dev(struct net_device *dev); 187int register_vlan_dev(struct net_device *dev);
148void unregister_vlan_dev(struct net_device *dev, struct list_head *head); 188void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index f3b6f515eba6..8a15eaadc4bd 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -8,11 +8,12 @@
8bool vlan_do_receive(struct sk_buff **skbp) 8bool vlan_do_receive(struct sk_buff **skbp)
9{ 9{
10 struct sk_buff *skb = *skbp; 10 struct sk_buff *skb = *skbp;
11 __be16 vlan_proto = skb->vlan_proto;
11 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; 12 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
12 struct net_device *vlan_dev; 13 struct net_device *vlan_dev;
13 struct vlan_pcpu_stats *rx_stats; 14 struct vlan_pcpu_stats *rx_stats;
14 15
15 vlan_dev = vlan_find_dev(skb->dev, vlan_id); 16 vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id);
16 if (!vlan_dev) 17 if (!vlan_dev)
17 return false; 18 return false;
18 19
@@ -38,7 +39,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
38 * original position later 39 * original position later
39 */ 40 */
40 skb_push(skb, offset); 41 skb_push(skb, offset);
41 skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); 42 skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto,
43 skb->vlan_tci);
42 if (!skb) 44 if (!skb)
43 return false; 45 return false;
44 skb_pull(skb, offset + VLAN_HLEN); 46 skb_pull(skb, offset + VLAN_HLEN);
@@ -62,12 +64,13 @@ bool vlan_do_receive(struct sk_buff **skbp)
62 64
63/* Must be invoked with rcu_read_lock. */ 65/* Must be invoked with rcu_read_lock. */
64struct net_device *__vlan_find_dev_deep(struct net_device *dev, 66struct net_device *__vlan_find_dev_deep(struct net_device *dev,
65 u16 vlan_id) 67 __be16 vlan_proto, u16 vlan_id)
66{ 68{
67 struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); 69 struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);
68 70
69 if (vlan_info) { 71 if (vlan_info) {
70 return vlan_group_get_device(&vlan_info->grp, vlan_id); 72 return vlan_group_get_device(&vlan_info->grp,
73 vlan_proto, vlan_id);
71 } else { 74 } else {
72 /* 75 /*
73 * Lower devices of master uppers (bonding, team) do not have 76 * Lower devices of master uppers (bonding, team) do not have
@@ -78,7 +81,8 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev,
78 81
79 upper_dev = netdev_master_upper_dev_get_rcu(dev); 82 upper_dev = netdev_master_upper_dev_get_rcu(dev);
80 if (upper_dev) 83 if (upper_dev)
81 return __vlan_find_dev_deep(upper_dev, vlan_id); 84 return __vlan_find_dev_deep(upper_dev,
85 vlan_proto, vlan_id);
82 } 86 }
83 87
84 return NULL; 88 return NULL;
@@ -125,7 +129,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
125 129
126 vhdr = (struct vlan_hdr *) skb->data; 130 vhdr = (struct vlan_hdr *) skb->data;
127 vlan_tci = ntohs(vhdr->h_vlan_TCI); 131 vlan_tci = ntohs(vhdr->h_vlan_TCI);
128 __vlan_hwaccel_put_tag(skb, vlan_tci); 132 __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
129 133
130 skb_pull_rcsum(skb, VLAN_HLEN); 134 skb_pull_rcsum(skb, VLAN_HLEN);
131 vlan_set_encap_proto(skb, vhdr); 135 vlan_set_encap_proto(skb, vhdr);
@@ -153,10 +157,11 @@ EXPORT_SYMBOL(vlan_untag);
153 157
154static void vlan_group_free(struct vlan_group *grp) 158static void vlan_group_free(struct vlan_group *grp)
155{ 159{
156 int i; 160 int i, j;
157 161
158 for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) 162 for (i = 0; i < VLAN_PROTO_NUM; i++)
159 kfree(grp->vlan_devices_arrays[i]); 163 for (j = 0; j < VLAN_GROUP_ARRAY_SPLIT_PARTS; j++)
164 kfree(grp->vlan_devices_arrays[i][j]);
160} 165}
161 166
162static void vlan_info_free(struct vlan_info *vlan_info) 167static void vlan_info_free(struct vlan_info *vlan_info)
@@ -185,35 +190,49 @@ static struct vlan_info *vlan_info_alloc(struct net_device *dev)
185 190
186struct vlan_vid_info { 191struct vlan_vid_info {
187 struct list_head list; 192 struct list_head list;
188 unsigned short vid; 193 __be16 proto;
194 u16 vid;
189 int refcount; 195 int refcount;
190}; 196};
191 197
198static bool vlan_hw_filter_capable(const struct net_device *dev,
199 const struct vlan_vid_info *vid_info)
200{
201 if (vid_info->proto == htons(ETH_P_8021Q) &&
202 dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
203 return true;
204 if (vid_info->proto == htons(ETH_P_8021AD) &&
205 dev->features & NETIF_F_HW_VLAN_STAG_FILTER)
206 return true;
207 return false;
208}
209
192static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info, 210static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info,
193 unsigned short vid) 211 __be16 proto, u16 vid)
194{ 212{
195 struct vlan_vid_info *vid_info; 213 struct vlan_vid_info *vid_info;
196 214
197 list_for_each_entry(vid_info, &vlan_info->vid_list, list) { 215 list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
198 if (vid_info->vid == vid) 216 if (vid_info->proto == proto && vid_info->vid == vid)
199 return vid_info; 217 return vid_info;
200 } 218 }
201 return NULL; 219 return NULL;
202} 220}
203 221
204static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid) 222static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid)
205{ 223{
206 struct vlan_vid_info *vid_info; 224 struct vlan_vid_info *vid_info;
207 225
208 vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL); 226 vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL);
209 if (!vid_info) 227 if (!vid_info)
210 return NULL; 228 return NULL;
229 vid_info->proto = proto;
211 vid_info->vid = vid; 230 vid_info->vid = vid;
212 231
213 return vid_info; 232 return vid_info;
214} 233}
215 234
216static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, 235static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
217 struct vlan_vid_info **pvid_info) 236 struct vlan_vid_info **pvid_info)
218{ 237{
219 struct net_device *dev = vlan_info->real_dev; 238 struct net_device *dev = vlan_info->real_dev;
@@ -221,12 +240,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
221 struct vlan_vid_info *vid_info; 240 struct vlan_vid_info *vid_info;
222 int err; 241 int err;
223 242
224 vid_info = vlan_vid_info_alloc(vid); 243 vid_info = vlan_vid_info_alloc(proto, vid);
225 if (!vid_info) 244 if (!vid_info)
226 return -ENOMEM; 245 return -ENOMEM;
227 246
228 if (dev->features & NETIF_F_HW_VLAN_FILTER) { 247 if (vlan_hw_filter_capable(dev, vid_info)) {
229 err = ops->ndo_vlan_rx_add_vid(dev, vid); 248 err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
230 if (err) { 249 if (err) {
231 kfree(vid_info); 250 kfree(vid_info);
232 return err; 251 return err;
@@ -238,7 +257,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
238 return 0; 257 return 0;
239} 258}
240 259
241int vlan_vid_add(struct net_device *dev, unsigned short vid) 260int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)
242{ 261{
243 struct vlan_info *vlan_info; 262 struct vlan_info *vlan_info;
244 struct vlan_vid_info *vid_info; 263 struct vlan_vid_info *vid_info;
@@ -254,9 +273,9 @@ int vlan_vid_add(struct net_device *dev, unsigned short vid)
254 return -ENOMEM; 273 return -ENOMEM;
255 vlan_info_created = true; 274 vlan_info_created = true;
256 } 275 }
257 vid_info = vlan_vid_info_get(vlan_info, vid); 276 vid_info = vlan_vid_info_get(vlan_info, proto, vid);
258 if (!vid_info) { 277 if (!vid_info) {
259 err = __vlan_vid_add(vlan_info, vid, &vid_info); 278 err = __vlan_vid_add(vlan_info, proto, vid, &vid_info);
260 if (err) 279 if (err)
261 goto out_free_vlan_info; 280 goto out_free_vlan_info;
262 } 281 }
@@ -279,14 +298,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
279{ 298{
280 struct net_device *dev = vlan_info->real_dev; 299 struct net_device *dev = vlan_info->real_dev;
281 const struct net_device_ops *ops = dev->netdev_ops; 300 const struct net_device_ops *ops = dev->netdev_ops;
282 unsigned short vid = vid_info->vid; 301 __be16 proto = vid_info->proto;
302 u16 vid = vid_info->vid;
283 int err; 303 int err;
284 304
285 if (dev->features & NETIF_F_HW_VLAN_FILTER) { 305 if (vlan_hw_filter_capable(dev, vid_info)) {
286 err = ops->ndo_vlan_rx_kill_vid(dev, vid); 306 err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
287 if (err) { 307 if (err) {
288 pr_warn("failed to kill vid %d for device %s\n", 308 pr_warn("failed to kill vid %04x/%d for device %s\n",
289 vid, dev->name); 309 proto, vid, dev->name);
290 } 310 }
291 } 311 }
292 list_del(&vid_info->list); 312 list_del(&vid_info->list);
@@ -294,7 +314,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
294 vlan_info->nr_vids--; 314 vlan_info->nr_vids--;
295} 315}
296 316
297void vlan_vid_del(struct net_device *dev, unsigned short vid) 317void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid)
298{ 318{
299 struct vlan_info *vlan_info; 319 struct vlan_info *vlan_info;
300 struct vlan_vid_info *vid_info; 320 struct vlan_vid_info *vid_info;
@@ -305,7 +325,7 @@ void vlan_vid_del(struct net_device *dev, unsigned short vid)
305 if (!vlan_info) 325 if (!vlan_info)
306 return; 326 return;
307 327
308 vid_info = vlan_vid_info_get(vlan_info, vid); 328 vid_info = vlan_vid_info_get(vlan_info, proto, vid);
309 if (!vid_info) 329 if (!vid_info)
310 return; 330 return;
311 vid_info->refcount--; 331 vid_info->refcount--;
@@ -333,7 +353,7 @@ int vlan_vids_add_by_dev(struct net_device *dev,
333 return 0; 353 return 0;
334 354
335 list_for_each_entry(vid_info, &vlan_info->vid_list, list) { 355 list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
336 err = vlan_vid_add(dev, vid_info->vid); 356 err = vlan_vid_add(dev, vid_info->proto, vid_info->vid);
337 if (err) 357 if (err)
338 goto unwind; 358 goto unwind;
339 } 359 }
@@ -343,7 +363,7 @@ unwind:
343 list_for_each_entry_continue_reverse(vid_info, 363 list_for_each_entry_continue_reverse(vid_info,
344 &vlan_info->vid_list, 364 &vlan_info->vid_list,
345 list) { 365 list) {
346 vlan_vid_del(dev, vid_info->vid); 366 vlan_vid_del(dev, vid_info->proto, vid_info->vid);
347 } 367 }
348 368
349 return err; 369 return err;
@@ -363,7 +383,7 @@ void vlan_vids_del_by_dev(struct net_device *dev,
363 return; 383 return;
364 384
365 list_for_each_entry(vid_info, &vlan_info->vid_list, list) 385 list_for_each_entry(vid_info, &vlan_info->vid_list, list)
366 vlan_vid_del(dev, vid_info->vid); 386 vlan_vid_del(dev, vid_info->proto, vid_info->vid);
367} 387}
368EXPORT_SYMBOL(vlan_vids_del_by_dev); 388EXPORT_SYMBOL(vlan_vids_del_by_dev);
369 389
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 19cf81bf9f69..8af508536d36 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -99,6 +99,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
99 const void *daddr, const void *saddr, 99 const void *daddr, const void *saddr,
100 unsigned int len) 100 unsigned int len)
101{ 101{
102 struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
102 struct vlan_hdr *vhdr; 103 struct vlan_hdr *vhdr;
103 unsigned int vhdrlen = 0; 104 unsigned int vhdrlen = 0;
104 u16 vlan_tci = 0; 105 u16 vlan_tci = 0;
@@ -120,8 +121,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
120 else 121 else
121 vhdr->h_vlan_encapsulated_proto = htons(len); 122 vhdr->h_vlan_encapsulated_proto = htons(len);
122 123
123 skb->protocol = htons(ETH_P_8021Q); 124 skb->protocol = vlan->vlan_proto;
124 type = ETH_P_8021Q; 125 type = ntohs(vlan->vlan_proto);
125 vhdrlen = VLAN_HLEN; 126 vhdrlen = VLAN_HLEN;
126 } 127 }
127 128
@@ -161,12 +162,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
161 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING 162 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
162 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... 163 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
163 */ 164 */
164 if (veth->h_vlan_proto != htons(ETH_P_8021Q) || 165 if (veth->h_vlan_proto != vlan->vlan_proto ||
165 vlan->flags & VLAN_FLAG_REORDER_HDR) { 166 vlan->flags & VLAN_FLAG_REORDER_HDR) {
166 u16 vlan_tci; 167 u16 vlan_tci;
167 vlan_tci = vlan->vlan_id; 168 vlan_tci = vlan->vlan_id;
168 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); 169 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
169 skb = __vlan_hwaccel_put_tag(skb, vlan_tci); 170 skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
170 } 171 }
171 172
172 skb->dev = vlan->real_dev; 173 skb->dev = vlan->real_dev;
@@ -583,7 +584,7 @@ static int vlan_dev_init(struct net_device *dev)
583#endif 584#endif
584 585
585 dev->needed_headroom = real_dev->needed_headroom; 586 dev->needed_headroom = real_dev->needed_headroom;
586 if (real_dev->features & NETIF_F_HW_VLAN_TX) { 587 if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) {
587 dev->header_ops = real_dev->header_ops; 588 dev->header_ops = real_dev->header_ops;
588 dev->hard_header_len = real_dev->hard_header_len; 589 dev->hard_header_len = real_dev->hard_header_len;
589 } else { 590 } else {
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 6f9755352760..66a80320b032 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -32,6 +32,8 @@ int vlan_gvrp_request_join(const struct net_device *dev)
32 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 32 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
33 __be16 vlan_id = htons(vlan->vlan_id); 33 __be16 vlan_id = htons(vlan->vlan_id);
34 34
35 if (vlan->vlan_proto != htons(ETH_P_8021Q))
36 return 0;
35 return garp_request_join(vlan->real_dev, &vlan_gvrp_app, 37 return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
36 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); 38 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
37} 39}
@@ -41,6 +43,8 @@ void vlan_gvrp_request_leave(const struct net_device *dev)
41 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 43 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
42 __be16 vlan_id = htons(vlan->vlan_id); 44 __be16 vlan_id = htons(vlan->vlan_id);
43 45
46 if (vlan->vlan_proto != htons(ETH_P_8021Q))
47 return;
44 garp_request_leave(vlan->real_dev, &vlan_gvrp_app, 48 garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
45 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); 49 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
46} 50}
diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c
index d9ec1d5964aa..e0fe091801b0 100644
--- a/net/8021q/vlan_mvrp.c
+++ b/net/8021q/vlan_mvrp.c
@@ -38,6 +38,8 @@ int vlan_mvrp_request_join(const struct net_device *dev)
38 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 38 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
39 __be16 vlan_id = htons(vlan->vlan_id); 39 __be16 vlan_id = htons(vlan->vlan_id);
40 40
41 if (vlan->vlan_proto != htons(ETH_P_8021Q))
42 return 0;
41 return mrp_request_join(vlan->real_dev, &vlan_mrp_app, 43 return mrp_request_join(vlan->real_dev, &vlan_mrp_app,
42 &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); 44 &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
43} 45}
@@ -47,6 +49,8 @@ void vlan_mvrp_request_leave(const struct net_device *dev)
47 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 49 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
48 __be16 vlan_id = htons(vlan->vlan_id); 50 __be16 vlan_id = htons(vlan->vlan_id);
49 51
52 if (vlan->vlan_proto != htons(ETH_P_8021Q))
53 return;
50 mrp_request_leave(vlan->real_dev, &vlan_mrp_app, 54 mrp_request_leave(vlan->real_dev, &vlan_mrp_app,
51 &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); 55 &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
52} 56}
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 1789658b7cd7..309129732285 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -23,6 +23,7 @@ static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
23 [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) }, 23 [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) },
24 [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED }, 24 [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED },
25 [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED }, 25 [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
26 [IFLA_VLAN_PROTOCOL] = { .type = NLA_U16 },
26}; 27};
27 28
28static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = { 29static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = {
@@ -53,6 +54,16 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
53 if (!data) 54 if (!data)
54 return -EINVAL; 55 return -EINVAL;
55 56
57 if (data[IFLA_VLAN_PROTOCOL]) {
58 switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) {
59 case __constant_htons(ETH_P_8021Q):
60 case __constant_htons(ETH_P_8021AD):
61 break;
62 default:
63 return -EPROTONOSUPPORT;
64 }
65 }
66
56 if (data[IFLA_VLAN_ID]) { 67 if (data[IFLA_VLAN_ID]) {
57 id = nla_get_u16(data[IFLA_VLAN_ID]); 68 id = nla_get_u16(data[IFLA_VLAN_ID]);
58 if (id >= VLAN_VID_MASK) 69 if (id >= VLAN_VID_MASK)
@@ -107,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
107{ 118{
108 struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 119 struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
109 struct net_device *real_dev; 120 struct net_device *real_dev;
121 __be16 proto;
110 int err; 122 int err;
111 123
112 if (!data[IFLA_VLAN_ID]) 124 if (!data[IFLA_VLAN_ID])
@@ -118,11 +130,17 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
118 if (!real_dev) 130 if (!real_dev)
119 return -ENODEV; 131 return -ENODEV;
120 132
121 vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); 133 if (data[IFLA_VLAN_PROTOCOL])
122 vlan->real_dev = real_dev; 134 proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]);
123 vlan->flags = VLAN_FLAG_REORDER_HDR; 135 else
136 proto = htons(ETH_P_8021Q);
137
138 vlan->vlan_proto = proto;
139 vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
140 vlan->real_dev = real_dev;
141 vlan->flags = VLAN_FLAG_REORDER_HDR;
124 142
125 err = vlan_check_real_dev(real_dev, vlan->vlan_id); 143 err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
126 if (err < 0) 144 if (err < 0)
127 return err; 145 return err;
128 146
@@ -151,7 +169,8 @@ static size_t vlan_get_size(const struct net_device *dev)
151{ 169{
152 struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 170 struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
153 171
154 return nla_total_size(2) + /* IFLA_VLAN_ID */ 172 return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */
173 nla_total_size(2) + /* IFLA_VLAN_ID */
155 sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ 174 sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
156 vlan_qos_map_size(vlan->nr_ingress_mappings) + 175 vlan_qos_map_size(vlan->nr_ingress_mappings) +
157 vlan_qos_map_size(vlan->nr_egress_mappings); 176 vlan_qos_map_size(vlan->nr_egress_mappings);
@@ -166,7 +185,8 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
166 struct nlattr *nest; 185 struct nlattr *nest;
167 unsigned int i; 186 unsigned int i;
168 187
169 if (nla_put_u16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id)) 188 if (nla_put_be16(skb, IFLA_VLAN_PROTOCOL, vlan->vlan_proto) ||
189 nla_put_u16(skb, IFLA_VLAN_ID, vlan->vlan_id))
170 goto nla_put_failure; 190 goto nla_put_failure;
171 if (vlan->flags) { 191 if (vlan->flags) {
172 f.flags = vlan->flags; 192 f.flags = vlan->flags;
diff --git a/net/Kconfig b/net/Kconfig
index 6f676ab885be..1a2221630e6a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -23,6 +23,15 @@ menuconfig NET
23 23
24if NET 24if NET
25 25
26config NETLINK_MMAP
27 bool "Netlink: mmaped IO"
28 help
29 This option enables support for memory mapped netlink IO. This
30 reduces overhead by avoiding copying data between kernel- and
31 userspace.
32
33 If unsure, say N.
34
26config WANT_COMPAT_NETLINK_MESSAGES 35config WANT_COMPAT_NETLINK_MESSAGES
27 bool 36 bool
28 help 37 help
@@ -217,6 +226,7 @@ source "net/dns_resolver/Kconfig"
217source "net/batman-adv/Kconfig" 226source "net/batman-adv/Kconfig"
218source "net/openvswitch/Kconfig" 227source "net/openvswitch/Kconfig"
219source "net/vmw_vsock/Kconfig" 228source "net/vmw_vsock/Kconfig"
229source "net/netlink/Kconfig"
220 230
221config RPS 231config RPS
222 boolean 232 boolean
diff --git a/net/atm/lec.h b/net/atm/lec.h
index a86aff9a3c04..4149db1b7885 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -58,7 +58,7 @@ struct lane2_ops {
58 * field in h_type field. Data follows immediately after header. 58 * field in h_type field. Data follows immediately after header.
59 * 2. LLC Data frames whose total length, including LLC field and data, 59 * 2. LLC Data frames whose total length, including LLC field and data,
60 * but not padding required to meet the minimum data frame length, 60 * but not padding required to meet the minimum data frame length,
61 * is less than 1536(0x0600) MUST be encoded by placing that length 61 * is less than ETH_P_802_3_MIN MUST be encoded by placing that length
62 * in the h_type field. The LLC field follows header immediately. 62 * in the h_type field. The LLC field follows header immediately.
63 * 3. LLC data frames longer than this maximum MUST be encoded by placing 63 * 3. LLC data frames longer than this maximum MUST be encoded by placing
64 * the value 0 in the h_type field. 64 * the value 0 in the h_type field.
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 8d8afb134b3a..fa780b76630e 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -36,6 +36,20 @@ config BATMAN_ADV_DAT
36 mesh networks. If you think that your network does not need 36 mesh networks. If you think that your network does not need
37 this option you can safely remove it and save some space. 37 this option you can safely remove it and save some space.
38 38
39config BATMAN_ADV_NC
40 bool "Network Coding"
41 depends on BATMAN_ADV
42 default n
43 help
44 This option enables network coding, a mechanism that aims to
45 increase the overall network throughput by fusing multiple
46 packets in one transmission.
47 Note that interfaces controlled by batman-adv must be manually
48 configured to have promiscuous mode enabled in order to make
49 network coding work.
50 If you think that your network does not need this feature you
51 can safely disable it and save some space.
52
39config BATMAN_ADV_DEBUG 53config BATMAN_ADV_DEBUG
40 bool "B.A.T.M.A.N. debugging" 54 bool "B.A.T.M.A.N. debugging"
41 depends on BATMAN_ADV 55 depends on BATMAN_ADV
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index e45e3b4e32e3..acbac2a9c62f 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: 2# Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
3# 3#
4# Marek Lindner, Simon Wunderlich 4# Marek Lindner, Simon Wunderlich
5# 5#
@@ -30,6 +30,7 @@ batman-adv-y += hard-interface.o
30batman-adv-y += hash.o 30batman-adv-y += hash.o
31batman-adv-y += icmp_socket.o 31batman-adv-y += icmp_socket.o
32batman-adv-y += main.o 32batman-adv-y += main.o
33batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
33batman-adv-y += originator.o 34batman-adv-y += originator.o
34batman-adv-y += ring_buffer.o 35batman-adv-y += ring_buffer.o
35batman-adv-y += routing.o 36batman-adv-y += routing.o
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a5bb0a769eb9..071f288b77a8 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -27,6 +27,7 @@
27#include "hard-interface.h" 27#include "hard-interface.h"
28#include "send.h" 28#include "send.h"
29#include "bat_algo.h" 29#include "bat_algo.h"
30#include "network-coding.h"
30 31
31static struct batadv_neigh_node * 32static struct batadv_neigh_node *
32batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, 33batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
@@ -1185,6 +1186,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
1185 if (!orig_neigh_node) 1186 if (!orig_neigh_node)
1186 goto out; 1187 goto out;
1187 1188
1189 /* Update nc_nodes of the originator */
1190 batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node,
1191 batadv_ogm_packet, is_single_hop_neigh);
1192
1188 orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node); 1193 orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node);
1189 1194
1190 /* drop packet if sender is not a direct neighbor and if we 1195 /* drop packet if sender is not a direct neighbor and if we
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 6a4f728680ae..379061c72549 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -341,7 +341,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
341 } 341 }
342 342
343 if (vid != -1) 343 if (vid != -1)
344 skb = vlan_insert_tag(skb, vid); 344 skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid);
345 345
346 skb_reset_mac_header(skb); 346 skb_reset_mac_header(skb);
347 skb->protocol = eth_type_trans(skb, soft_iface); 347 skb->protocol = eth_type_trans(skb, soft_iface);
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 6ae86516db4d..f186a55b23c3 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -32,6 +32,7 @@
32#include "icmp_socket.h" 32#include "icmp_socket.h"
33#include "bridge_loop_avoidance.h" 33#include "bridge_loop_avoidance.h"
34#include "distributed-arp-table.h" 34#include "distributed-arp-table.h"
35#include "network-coding.h"
35 36
36static struct dentry *batadv_debugfs; 37static struct dentry *batadv_debugfs;
37 38
@@ -310,6 +311,14 @@ struct batadv_debuginfo {
310 const struct file_operations fops; 311 const struct file_operations fops;
311}; 312};
312 313
314#ifdef CONFIG_BATMAN_ADV_NC
315static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
316{
317 struct net_device *net_dev = (struct net_device *)inode->i_private;
318 return single_open(file, batadv_nc_nodes_seq_print_text, net_dev);
319}
320#endif
321
313#define BATADV_DEBUGINFO(_name, _mode, _open) \ 322#define BATADV_DEBUGINFO(_name, _mode, _open) \
314struct batadv_debuginfo batadv_debuginfo_##_name = { \ 323struct batadv_debuginfo batadv_debuginfo_##_name = { \
315 .attr = { .name = __stringify(_name), \ 324 .attr = { .name = __stringify(_name), \
@@ -348,6 +357,9 @@ static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open);
348static BATADV_DEBUGINFO(transtable_local, S_IRUGO, 357static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
349 batadv_transtable_local_open); 358 batadv_transtable_local_open);
350static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open); 359static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open);
360#ifdef CONFIG_BATMAN_ADV_NC
361static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
362#endif
351 363
352static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { 364static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
353 &batadv_debuginfo_originators, 365 &batadv_debuginfo_originators,
@@ -362,6 +374,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
362#endif 374#endif
363 &batadv_debuginfo_transtable_local, 375 &batadv_debuginfo_transtable_local,
364 &batadv_debuginfo_vis_data, 376 &batadv_debuginfo_vis_data,
377#ifdef CONFIG_BATMAN_ADV_NC
378 &batadv_debuginfo_nc_nodes,
379#endif
365 NULL, 380 NULL,
366}; 381};
367 382
@@ -431,6 +446,9 @@ int batadv_debugfs_add_meshif(struct net_device *dev)
431 } 446 }
432 } 447 }
433 448
449 if (batadv_nc_init_debugfs(bat_priv) < 0)
450 goto rem_attr;
451
434 return 0; 452 return 0;
435rem_attr: 453rem_attr:
436 debugfs_remove_recursive(bat_priv->debug_dir); 454 debugfs_remove_recursive(bat_priv->debug_dir);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index d54188a112ea..8e15d966d9b0 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -816,7 +816,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
816 bool ret = false; 816 bool ret = false;
817 struct batadv_dat_entry *dat_entry = NULL; 817 struct batadv_dat_entry *dat_entry = NULL;
818 struct sk_buff *skb_new; 818 struct sk_buff *skb_new;
819 struct batadv_hard_iface *primary_if = NULL;
820 819
821 if (!atomic_read(&bat_priv->distributed_arp_table)) 820 if (!atomic_read(&bat_priv->distributed_arp_table))
822 goto out; 821 goto out;
@@ -838,22 +837,18 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
838 837
839 dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); 838 dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
840 if (dat_entry) { 839 if (dat_entry) {
841 primary_if = batadv_primary_if_get_selected(bat_priv);
842 if (!primary_if)
843 goto out;
844
845 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, 840 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
846 primary_if->soft_iface, ip_dst, hw_src, 841 bat_priv->soft_iface, ip_dst, hw_src,
847 dat_entry->mac_addr, hw_src); 842 dat_entry->mac_addr, hw_src);
848 if (!skb_new) 843 if (!skb_new)
849 goto out; 844 goto out;
850 845
851 skb_reset_mac_header(skb_new); 846 skb_reset_mac_header(skb_new);
852 skb_new->protocol = eth_type_trans(skb_new, 847 skb_new->protocol = eth_type_trans(skb_new,
853 primary_if->soft_iface); 848 bat_priv->soft_iface);
854 bat_priv->stats.rx_packets++; 849 bat_priv->stats.rx_packets++;
855 bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; 850 bat_priv->stats.rx_bytes += skb->len + ETH_HLEN;
856 primary_if->soft_iface->last_rx = jiffies; 851 bat_priv->soft_iface->last_rx = jiffies;
857 852
858 netif_rx(skb_new); 853 netif_rx(skb_new);
859 batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); 854 batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
@@ -866,8 +861,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
866out: 861out:
867 if (dat_entry) 862 if (dat_entry)
868 batadv_dat_entry_free_ref(dat_entry); 863 batadv_dat_entry_free_ref(dat_entry);
869 if (primary_if)
870 batadv_hardif_free_ref(primary_if);
871 return ret; 864 return ret;
872} 865}
873 866
@@ -887,7 +880,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
887 __be32 ip_src, ip_dst; 880 __be32 ip_src, ip_dst;
888 uint8_t *hw_src; 881 uint8_t *hw_src;
889 struct sk_buff *skb_new; 882 struct sk_buff *skb_new;
890 struct batadv_hard_iface *primary_if = NULL;
891 struct batadv_dat_entry *dat_entry = NULL; 883 struct batadv_dat_entry *dat_entry = NULL;
892 bool ret = false; 884 bool ret = false;
893 int err; 885 int err;
@@ -912,12 +904,8 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
912 if (!dat_entry) 904 if (!dat_entry)
913 goto out; 905 goto out;
914 906
915 primary_if = batadv_primary_if_get_selected(bat_priv);
916 if (!primary_if)
917 goto out;
918
919 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, 907 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
920 primary_if->soft_iface, ip_dst, hw_src, 908 bat_priv->soft_iface, ip_dst, hw_src,
921 dat_entry->mac_addr, hw_src); 909 dat_entry->mac_addr, hw_src);
922 910
923 if (!skb_new) 911 if (!skb_new)
@@ -941,8 +929,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
941out: 929out:
942 if (dat_entry) 930 if (dat_entry)
943 batadv_dat_entry_free_ref(dat_entry); 931 batadv_dat_entry_free_ref(dat_entry);
944 if (primary_if)
945 batadv_hardif_free_ref(primary_if);
946 if (ret) 932 if (ret)
947 kfree_skb(skb); 933 kfree_skb(skb);
948 return ret; 934 return ret;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 34f99a46ec1d..f105219f4a4b 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -500,7 +500,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
500 rcu_read_unlock(); 500 rcu_read_unlock();
501 501
502 if (gw_count == 0) 502 if (gw_count == 0)
503 seq_printf(seq, "No gateways in range ...\n"); 503 seq_puts(seq, "No gateways in range ...\n");
504 504
505out: 505out:
506 if (primary_if) 506 if (primary_if)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 368219e026a9..522243aff2f3 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -307,11 +307,35 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
307 batadv_update_min_mtu(hard_iface->soft_iface); 307 batadv_update_min_mtu(hard_iface->soft_iface);
308} 308}
309 309
310/**
311 * batadv_master_del_slave - remove hard_iface from the current master interface
312 * @slave: the interface enslaved in another master
313 * @master: the master from which slave has to be removed
314 *
315 * Invoke ndo_del_slave on master passing slave as argument. In this way slave
316 * is free'd and master can correctly change its internal state.
317 * Return 0 on success, a negative value representing the error otherwise
318 */
319static int batadv_master_del_slave(struct batadv_hard_iface *slave,
320 struct net_device *master)
321{
322 int ret;
323
324 if (!master)
325 return 0;
326
327 ret = -EBUSY;
328 if (master->netdev_ops->ndo_del_slave)
329 ret = master->netdev_ops->ndo_del_slave(master, slave->net_dev);
330
331 return ret;
332}
333
310int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, 334int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
311 const char *iface_name) 335 const char *iface_name)
312{ 336{
313 struct batadv_priv *bat_priv; 337 struct batadv_priv *bat_priv;
314 struct net_device *soft_iface; 338 struct net_device *soft_iface, *master;
315 __be16 ethertype = __constant_htons(ETH_P_BATMAN); 339 __be16 ethertype = __constant_htons(ETH_P_BATMAN);
316 int ret; 340 int ret;
317 341
@@ -321,11 +345,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
321 if (!atomic_inc_not_zero(&hard_iface->refcount)) 345 if (!atomic_inc_not_zero(&hard_iface->refcount))
322 goto out; 346 goto out;
323 347
324 /* hard-interface is part of a bridge */
325 if (hard_iface->net_dev->priv_flags & IFF_BRIDGE_PORT)
326 pr_err("You are about to enable batman-adv on '%s' which already is part of a bridge. Unless you know exactly what you are doing this is probably wrong and won't work the way you think it would.\n",
327 hard_iface->net_dev->name);
328
329 soft_iface = dev_get_by_name(&init_net, iface_name); 348 soft_iface = dev_get_by_name(&init_net, iface_name);
330 349
331 if (!soft_iface) { 350 if (!soft_iface) {
@@ -347,12 +366,24 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
347 goto err_dev; 366 goto err_dev;
348 } 367 }
349 368
369 /* check if the interface is enslaved in another virtual one and
370 * in that case unlink it first
371 */
372 master = netdev_master_upper_dev_get(hard_iface->net_dev);
373 ret = batadv_master_del_slave(hard_iface, master);
374 if (ret)
375 goto err_dev;
376
350 hard_iface->soft_iface = soft_iface; 377 hard_iface->soft_iface = soft_iface;
351 bat_priv = netdev_priv(hard_iface->soft_iface); 378 bat_priv = netdev_priv(hard_iface->soft_iface);
352 379
380 ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface);
381 if (ret)
382 goto err_dev;
383
353 ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface); 384 ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface);
354 if (ret < 0) 385 if (ret < 0)
355 goto err_dev; 386 goto err_upper;
356 387
357 hard_iface->if_num = bat_priv->num_ifaces; 388 hard_iface->if_num = bat_priv->num_ifaces;
358 bat_priv->num_ifaces++; 389 bat_priv->num_ifaces++;
@@ -362,7 +393,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
362 bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); 393 bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
363 bat_priv->num_ifaces--; 394 bat_priv->num_ifaces--;
364 hard_iface->if_status = BATADV_IF_NOT_IN_USE; 395 hard_iface->if_status = BATADV_IF_NOT_IN_USE;
365 goto err_dev; 396 goto err_upper;
366 } 397 }
367 398
368 hard_iface->batman_adv_ptype.type = ethertype; 399 hard_iface->batman_adv_ptype.type = ethertype;
@@ -401,14 +432,18 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
401out: 432out:
402 return 0; 433 return 0;
403 434
435err_upper:
436 netdev_upper_dev_unlink(hard_iface->net_dev, soft_iface);
404err_dev: 437err_dev:
438 hard_iface->soft_iface = NULL;
405 dev_put(soft_iface); 439 dev_put(soft_iface);
406err: 440err:
407 batadv_hardif_free_ref(hard_iface); 441 batadv_hardif_free_ref(hard_iface);
408 return ret; 442 return ret;
409} 443}
410 444
411void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) 445void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
446 enum batadv_hard_if_cleanup autodel)
412{ 447{
413 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 448 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
414 struct batadv_hard_iface *primary_if = NULL; 449 struct batadv_hard_iface *primary_if = NULL;
@@ -446,9 +481,10 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
446 dev_put(hard_iface->soft_iface); 481 dev_put(hard_iface->soft_iface);
447 482
448 /* nobody uses this interface anymore */ 483 /* nobody uses this interface anymore */
449 if (!bat_priv->num_ifaces) 484 if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO)
450 batadv_softif_destroy(hard_iface->soft_iface); 485 batadv_softif_destroy_sysfs(hard_iface->soft_iface);
451 486
487 netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface);
452 hard_iface->soft_iface = NULL; 488 hard_iface->soft_iface = NULL;
453 batadv_hardif_free_ref(hard_iface); 489 batadv_hardif_free_ref(hard_iface);
454 490
@@ -533,7 +569,8 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
533 569
534 /* first deactivate interface */ 570 /* first deactivate interface */
535 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) 571 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
536 batadv_hardif_disable_interface(hard_iface); 572 batadv_hardif_disable_interface(hard_iface,
573 BATADV_IF_CLEANUP_AUTO);
537 574
538 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) 575 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
539 return; 576 return;
@@ -563,6 +600,11 @@ static int batadv_hard_if_event(struct notifier_block *this,
563 struct batadv_hard_iface *primary_if = NULL; 600 struct batadv_hard_iface *primary_if = NULL;
564 struct batadv_priv *bat_priv; 601 struct batadv_priv *bat_priv;
565 602
603 if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) {
604 batadv_sysfs_add_meshif(net_dev);
605 return NOTIFY_DONE;
606 }
607
566 hard_iface = batadv_hardif_get_by_netdev(net_dev); 608 hard_iface = batadv_hardif_get_by_netdev(net_dev);
567 if (!hard_iface && event == NETDEV_REGISTER) 609 if (!hard_iface && event == NETDEV_REGISTER)
568 hard_iface = batadv_hardif_add_interface(net_dev); 610 hard_iface = batadv_hardif_add_interface(net_dev);
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 308437d52e22..49892881a7c5 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -29,13 +29,24 @@ enum batadv_hard_if_state {
29 BATADV_IF_I_WANT_YOU, 29 BATADV_IF_I_WANT_YOU,
30}; 30};
31 31
32/**
33 * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal
34 * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
35 * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed
36 */
37enum batadv_hard_if_cleanup {
38 BATADV_IF_CLEANUP_KEEP,
39 BATADV_IF_CLEANUP_AUTO,
40};
41
32extern struct notifier_block batadv_hard_if_notifier; 42extern struct notifier_block batadv_hard_if_notifier;
33 43
34struct batadv_hard_iface* 44struct batadv_hard_iface*
35batadv_hardif_get_by_netdev(const struct net_device *net_dev); 45batadv_hardif_get_by_netdev(const struct net_device *net_dev);
36int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, 46int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
37 const char *iface_name); 47 const char *iface_name);
38void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface); 48void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
49 enum batadv_hard_if_cleanup autodel);
39void batadv_hardif_remove_interfaces(void); 50void batadv_hardif_remove_interfaces(void);
40int batadv_hardif_min_mtu(struct net_device *soft_iface); 51int batadv_hardif_min_mtu(struct net_device *soft_iface);
41void batadv_update_min_mtu(struct net_device *soft_iface); 52void batadv_update_min_mtu(struct net_device *soft_iface);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fa563e497c48..3e30a0f1b908 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -35,6 +35,7 @@
35#include "vis.h" 35#include "vis.h"
36#include "hash.h" 36#include "hash.h"
37#include "bat_algo.h" 37#include "bat_algo.h"
38#include "network-coding.h"
38 39
39 40
40/* List manipulations on hardif_list have to be rtnl_lock()'ed, 41/* List manipulations on hardif_list have to be rtnl_lock()'ed,
@@ -70,6 +71,7 @@ static int __init batadv_init(void)
70 batadv_debugfs_init(); 71 batadv_debugfs_init();
71 72
72 register_netdevice_notifier(&batadv_hard_if_notifier); 73 register_netdevice_notifier(&batadv_hard_if_notifier);
74 rtnl_link_register(&batadv_link_ops);
73 75
74 pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n", 76 pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n",
75 BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); 77 BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
@@ -80,6 +82,7 @@ static int __init batadv_init(void)
80static void __exit batadv_exit(void) 82static void __exit batadv_exit(void)
81{ 83{
82 batadv_debugfs_destroy(); 84 batadv_debugfs_destroy();
85 rtnl_link_unregister(&batadv_link_ops);
83 unregister_netdevice_notifier(&batadv_hard_if_notifier); 86 unregister_netdevice_notifier(&batadv_hard_if_notifier);
84 batadv_hardif_remove_interfaces(); 87 batadv_hardif_remove_interfaces();
85 88
@@ -135,6 +138,10 @@ int batadv_mesh_init(struct net_device *soft_iface)
135 if (ret < 0) 138 if (ret < 0)
136 goto err; 139 goto err;
137 140
141 ret = batadv_nc_init(bat_priv);
142 if (ret < 0)
143 goto err;
144
138 atomic_set(&bat_priv->gw.reselect, 0); 145 atomic_set(&bat_priv->gw.reselect, 0);
139 atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); 146 atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE);
140 147
@@ -157,6 +164,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
157 164
158 batadv_gw_node_purge(bat_priv); 165 batadv_gw_node_purge(bat_priv);
159 batadv_originator_free(bat_priv); 166 batadv_originator_free(bat_priv);
167 batadv_nc_free(bat_priv);
160 168
161 batadv_tt_free(bat_priv); 169 batadv_tt_free(bat_priv);
162 170
@@ -169,6 +177,12 @@ void batadv_mesh_free(struct net_device *soft_iface)
169 atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); 177 atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
170} 178}
171 179
180/**
181 * batadv_is_my_mac - check if the given mac address belongs to any of the real
182 * interfaces in the current mesh
183 * @bat_priv: the bat priv with all the soft interface information
184 * @addr: the address to check
185 */
172int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr) 186int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)
173{ 187{
174 const struct batadv_hard_iface *hard_iface; 188 const struct batadv_hard_iface *hard_iface;
@@ -414,7 +428,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
414{ 428{
415 struct batadv_algo_ops *bat_algo_ops; 429 struct batadv_algo_ops *bat_algo_ops;
416 430
417 seq_printf(seq, "Available routing algorithms:\n"); 431 seq_puts(seq, "Available routing algorithms:\n");
418 432
419 hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { 433 hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
420 seq_printf(seq, "%s\n", bat_algo_ops->name); 434 seq_printf(seq, "%s\n", bat_algo_ops->name);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index d40910dfc8ea..59a0d6af15c8 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
26#define BATADV_DRIVER_DEVICE "batman-adv" 26#define BATADV_DRIVER_DEVICE "batman-adv"
27 27
28#ifndef BATADV_SOURCE_VERSION 28#ifndef BATADV_SOURCE_VERSION
29#define BATADV_SOURCE_VERSION "2013.1.0" 29#define BATADV_SOURCE_VERSION "2013.2.0"
30#endif 30#endif
31 31
32/* B.A.T.M.A.N. parameters */ 32/* B.A.T.M.A.N. parameters */
@@ -105,6 +105,8 @@
105#define BATADV_RESET_PROTECTION_MS 30000 105#define BATADV_RESET_PROTECTION_MS 30000
106#define BATADV_EXPECTED_SEQNO_RANGE 65536 106#define BATADV_EXPECTED_SEQNO_RANGE 65536
107 107
108#define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */
109
108enum batadv_mesh_state { 110enum batadv_mesh_state {
109 BATADV_MESH_INACTIVE, 111 BATADV_MESH_INACTIVE,
110 BATADV_MESH_ACTIVE, 112 BATADV_MESH_ACTIVE,
@@ -150,6 +152,7 @@ enum batadv_uev_type {
150#include <linux/percpu.h> 152#include <linux/percpu.h>
151#include <linux/slab.h> 153#include <linux/slab.h>
152#include <net/sock.h> /* struct sock */ 154#include <net/sock.h> /* struct sock */
155#include <net/rtnetlink.h>
153#include <linux/jiffies.h> 156#include <linux/jiffies.h>
154#include <linux/seq_file.h> 157#include <linux/seq_file.h>
155#include "types.h" 158#include "types.h"
@@ -185,6 +188,7 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
185 * @BATADV_DBG_TT: translation table messages 188 * @BATADV_DBG_TT: translation table messages
186 * @BATADV_DBG_BLA: bridge loop avoidance messages 189 * @BATADV_DBG_BLA: bridge loop avoidance messages
187 * @BATADV_DBG_DAT: ARP snooping and DAT related messages 190 * @BATADV_DBG_DAT: ARP snooping and DAT related messages
191 * @BATADV_DBG_NC: network coding related messages
188 * @BATADV_DBG_ALL: the union of all the above log levels 192 * @BATADV_DBG_ALL: the union of all the above log levels
189 */ 193 */
190enum batadv_dbg_level { 194enum batadv_dbg_level {
@@ -193,7 +197,8 @@ enum batadv_dbg_level {
193 BATADV_DBG_TT = BIT(2), 197 BATADV_DBG_TT = BIT(2),
194 BATADV_DBG_BLA = BIT(3), 198 BATADV_DBG_BLA = BIT(3),
195 BATADV_DBG_DAT = BIT(4), 199 BATADV_DBG_DAT = BIT(4),
196 BATADV_DBG_ALL = 31, 200 BATADV_DBG_NC = BIT(5),
201 BATADV_DBG_ALL = 63,
197}; 202};
198 203
199#ifdef CONFIG_BATMAN_ADV_DEBUG 204#ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -298,4 +303,10 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv,
298 return sum; 303 return sum;
299} 304}
300 305
306/* Define a macro to reach the control buffer of the skb. The members of the
307 * control buffer are defined in struct batadv_skb_cb in types.h.
308 * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h.
309 */
310#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0]))
311
301#endif /* _NET_BATMAN_ADV_MAIN_H_ */ 312#endif /* _NET_BATMAN_ADV_MAIN_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
new file mode 100644
index 000000000000..f7c54305a918
--- /dev/null
+++ b/net/batman-adv/network-coding.c
@@ -0,0 +1,1822 @@
1/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors:
2 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#include <linux/debugfs.h>
21
22#include "main.h"
23#include "hash.h"
24#include "network-coding.h"
25#include "send.h"
26#include "originator.h"
27#include "hard-interface.h"
28#include "routing.h"
29
30static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
31static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;
32
33static void batadv_nc_worker(struct work_struct *work);
34static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
35 struct batadv_hard_iface *recv_if);
36
37/**
38 * batadv_nc_start_timer - initialise the nc periodic worker
39 * @bat_priv: the bat priv with all the soft interface information
40 */
41static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
42{
43 queue_delayed_work(batadv_event_workqueue, &bat_priv->nc.work,
44 msecs_to_jiffies(10));
45}
46
47/**
48 * batadv_nc_init - initialise coding hash table and start house keeping
49 * @bat_priv: the bat priv with all the soft interface information
50 */
51int batadv_nc_init(struct batadv_priv *bat_priv)
52{
53 bat_priv->nc.timestamp_fwd_flush = jiffies;
54 bat_priv->nc.timestamp_sniffed_purge = jiffies;
55
56 if (bat_priv->nc.coding_hash || bat_priv->nc.decoding_hash)
57 return 0;
58
59 bat_priv->nc.coding_hash = batadv_hash_new(128);
60 if (!bat_priv->nc.coding_hash)
61 goto err;
62
63 batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
64 &batadv_nc_coding_hash_lock_class_key);
65
66 bat_priv->nc.decoding_hash = batadv_hash_new(128);
67 if (!bat_priv->nc.decoding_hash)
68 goto err;
69
70 batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
71 &batadv_nc_decoding_hash_lock_class_key);
72
73 /* Register our packet type */
74 if (batadv_recv_handler_register(BATADV_CODED,
75 batadv_nc_recv_coded_packet) < 0)
76 goto err;
77
78 INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker);
79 batadv_nc_start_timer(bat_priv);
80
81 return 0;
82
83err:
84 return -ENOMEM;
85}
86
87/**
88 * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables
89 * @bat_priv: the bat priv with all the soft interface information
90 */
91void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
92{
93 atomic_set(&bat_priv->network_coding, 1);
94 bat_priv->nc.min_tq = 200;
95 bat_priv->nc.max_fwd_delay = 10;
96 bat_priv->nc.max_buffer_time = 200;
97}
98
99/**
100 * batadv_nc_init_orig - initialise the nc fields of an orig_node
101 * @orig_node: the orig_node which is going to be initialised
102 */
103void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
104{
105 INIT_LIST_HEAD(&orig_node->in_coding_list);
106 INIT_LIST_HEAD(&orig_node->out_coding_list);
107 spin_lock_init(&orig_node->in_coding_list_lock);
108 spin_lock_init(&orig_node->out_coding_list_lock);
109}
110
111/**
112 * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove
113 * its refcount on the orig_node
114 * @rcu: rcu pointer of the nc node
115 */
116static void batadv_nc_node_free_rcu(struct rcu_head *rcu)
117{
118 struct batadv_nc_node *nc_node;
119
120 nc_node = container_of(rcu, struct batadv_nc_node, rcu);
121 batadv_orig_node_free_ref(nc_node->orig_node);
122 kfree(nc_node);
123}
124
125/**
126 * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly
127 * frees it
128 * @nc_node: the nc node to free
129 */
130static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node)
131{
132 if (atomic_dec_and_test(&nc_node->refcount))
133 call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu);
134}
135
136/**
137 * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly
138 * frees it
139 * @nc_path: the nc node to free
140 */
141static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path)
142{
143 if (atomic_dec_and_test(&nc_path->refcount))
144 kfree_rcu(nc_path, rcu);
145}
146
147/**
148 * batadv_nc_packet_free - frees nc packet
149 * @nc_packet: the nc packet to free
150 */
151static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
152{
153 if (nc_packet->skb)
154 kfree_skb(nc_packet->skb);
155
156 batadv_nc_path_free_ref(nc_packet->nc_path);
157 kfree(nc_packet);
158}
159
160/**
161 * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged
162 * @bat_priv: the bat priv with all the soft interface information
163 * @nc_node: the nc node to check
164 *
165 * Returns true if the entry has to be purged now, false otherwise
166 */
167static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
168 struct batadv_nc_node *nc_node)
169{
170 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
171 return true;
172
173 return batadv_has_timed_out(nc_node->last_seen, BATADV_NC_NODE_TIMEOUT);
174}
175
176/**
177 * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out
178 * @bat_priv: the bat priv with all the soft interface information
179 * @nc_path: the nc path to check
180 *
181 * Returns true if the entry has to be purged now, false otherwise
182 */
183static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
184 struct batadv_nc_path *nc_path)
185{
186 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
187 return true;
188
189 /* purge the path when no packets has been added for 10 times the
190 * max_fwd_delay time
191 */
192 return batadv_has_timed_out(nc_path->last_valid,
193 bat_priv->nc.max_fwd_delay * 10);
194}
195
196/**
197 * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out
198 * @bat_priv: the bat priv with all the soft interface information
199 * @nc_path: the nc path to check
200 *
201 * Returns true if the entry has to be purged now, false otherwise
202 */
203static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
204 struct batadv_nc_path *nc_path)
205{
206 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
207 return true;
208
209 /* purge the path when no packets has been added for 10 times the
210 * max_buffer time
211 */
212 return batadv_has_timed_out(nc_path->last_valid,
213 bat_priv->nc.max_buffer_time*10);
214}
215
216/**
217 * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale
218 * entries
219 * @bat_priv: the bat priv with all the soft interface information
220 * @list: list of nc nodes
221 * @lock: nc node list lock
222 * @to_purge: function in charge to decide whether an entry has to be purged or
223 * not. This function takes the nc node as argument and has to return
224 * a boolean value: true if the entry has to be deleted, false
225 * otherwise
226 */
227static void
228batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv,
229 struct list_head *list,
230 spinlock_t *lock,
231 bool (*to_purge)(struct batadv_priv *,
232 struct batadv_nc_node *))
233{
234 struct batadv_nc_node *nc_node, *nc_node_tmp;
235
236 /* For each nc_node in list */
237 spin_lock_bh(lock);
238 list_for_each_entry_safe(nc_node, nc_node_tmp, list, list) {
239 /* if an helper function has been passed as parameter,
240 * ask it if the entry has to be purged or not
241 */
242 if (to_purge && !to_purge(bat_priv, nc_node))
243 continue;
244
245 batadv_dbg(BATADV_DBG_NC, bat_priv,
246 "Removing nc_node %pM -> %pM\n",
247 nc_node->addr, nc_node->orig_node->orig);
248 list_del_rcu(&nc_node->list);
249 batadv_nc_node_free_ref(nc_node);
250 }
251 spin_unlock_bh(lock);
252}
253
254/**
255 * batadv_nc_purge_orig - purges all nc node data attached of the given
256 * originator
257 * @bat_priv: the bat priv with all the soft interface information
258 * @orig_node: orig_node with the nc node entries to be purged
259 * @to_purge: function in charge to decide whether an entry has to be purged or
260 * not. This function takes the nc node as argument and has to return
261 * a boolean value: true is the entry has to be deleted, false
262 * otherwise
263 */
264void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
265 struct batadv_orig_node *orig_node,
266 bool (*to_purge)(struct batadv_priv *,
267 struct batadv_nc_node *))
268{
269 /* Check ingoing nc_node's of this orig_node */
270 batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->in_coding_list,
271 &orig_node->in_coding_list_lock,
272 to_purge);
273
274 /* Check outgoing nc_node's of this orig_node */
275 batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->out_coding_list,
276 &orig_node->out_coding_list_lock,
277 to_purge);
278}
279
280/**
281 * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they
282 * have timed out nc nodes
283 * @bat_priv: the bat priv with all the soft interface information
284 */
285static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv)
286{
287 struct batadv_hashtable *hash = bat_priv->orig_hash;
288 struct hlist_head *head;
289 struct batadv_orig_node *orig_node;
290 uint32_t i;
291
292 if (!hash)
293 return;
294
295 /* For each orig_node */
296 for (i = 0; i < hash->size; i++) {
297 head = &hash->table[i];
298
299 rcu_read_lock();
300 hlist_for_each_entry_rcu(orig_node, head, hash_entry)
301 batadv_nc_purge_orig(bat_priv, orig_node,
302 batadv_nc_to_purge_nc_node);
303 rcu_read_unlock();
304 }
305}
306
307/**
308 * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove
309 * unused ones
310 * @bat_priv: the bat priv with all the soft interface information
311 * @hash: hash table containing the nc paths to check
312 * @to_purge: function in charge to decide whether an entry has to be purged or
313 * not. This function takes the nc node as argument and has to return
314 * a boolean value: true is the entry has to be deleted, false
315 * otherwise
316 */
317static void batadv_nc_purge_paths(struct batadv_priv *bat_priv,
318 struct batadv_hashtable *hash,
319 bool (*to_purge)(struct batadv_priv *,
320 struct batadv_nc_path *))
321{
322 struct hlist_head *head;
323 struct hlist_node *node_tmp;
324 struct batadv_nc_path *nc_path;
325 spinlock_t *lock; /* Protects lists in hash */
326 uint32_t i;
327
328 for (i = 0; i < hash->size; i++) {
329 head = &hash->table[i];
330 lock = &hash->list_locks[i];
331
332 /* For each nc_path in this bin */
333 spin_lock_bh(lock);
334 hlist_for_each_entry_safe(nc_path, node_tmp, head, hash_entry) {
335 /* if an helper function has been passed as parameter,
336 * ask it if the entry has to be purged or not
337 */
338 if (to_purge && !to_purge(bat_priv, nc_path))
339 continue;
340
341 /* purging an non-empty nc_path should never happen, but
342 * is observed under high CPU load. Delay the purging
343 * until next iteration to allow the packet_list to be
344 * emptied first.
345 */
346 if (!unlikely(list_empty(&nc_path->packet_list))) {
347 net_ratelimited_function(printk,
348 KERN_WARNING
349 "Skipping free of non-empty nc_path (%pM -> %pM)!\n",
350 nc_path->prev_hop,
351 nc_path->next_hop);
352 continue;
353 }
354
355 /* nc_path is unused, so remove it */
356 batadv_dbg(BATADV_DBG_NC, bat_priv,
357 "Remove nc_path %pM -> %pM\n",
358 nc_path->prev_hop, nc_path->next_hop);
359 hlist_del_rcu(&nc_path->hash_entry);
360 batadv_nc_path_free_ref(nc_path);
361 }
362 spin_unlock_bh(lock);
363 }
364}
365
366/**
367 * batadv_nc_hash_key_gen - computes the nc_path hash key
368 * @key: buffer to hold the final hash key
369 * @src: source ethernet mac address going into the hash key
370 * @dst: destination ethernet mac address going into the hash key
371 */
372static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src,
373 const char *dst)
374{
375 memcpy(key->prev_hop, src, sizeof(key->prev_hop));
376 memcpy(key->next_hop, dst, sizeof(key->next_hop));
377}
378
379/**
380 * batadv_nc_hash_choose - compute the hash value for an nc path
381 * @data: data to hash
382 * @size: size of the hash table
383 *
384 * Returns the selected index in the hash table for the given data.
385 */
386static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size)
387{
388 const struct batadv_nc_path *nc_path = data;
389 uint32_t hash = 0;
390
391 hash = batadv_hash_bytes(hash, &nc_path->prev_hop,
392 sizeof(nc_path->prev_hop));
393 hash = batadv_hash_bytes(hash, &nc_path->next_hop,
394 sizeof(nc_path->next_hop));
395
396 hash += (hash << 3);
397 hash ^= (hash >> 11);
398 hash += (hash << 15);
399
400 return hash % size;
401}
402
403/**
404 * batadv_nc_hash_compare - comparing function used in the network coding hash
405 * tables
406 * @node: node in the local table
407 * @data2: second object to compare the node to
408 *
409 * Returns 1 if the two entry are the same, 0 otherwise
410 */
411static int batadv_nc_hash_compare(const struct hlist_node *node,
412 const void *data2)
413{
414 const struct batadv_nc_path *nc_path1, *nc_path2;
415
416 nc_path1 = container_of(node, struct batadv_nc_path, hash_entry);
417 nc_path2 = data2;
418
419 /* Return 1 if the two keys are identical */
420 if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop,
421 sizeof(nc_path1->prev_hop)) != 0)
422 return 0;
423
424 if (memcmp(nc_path1->next_hop, nc_path2->next_hop,
425 sizeof(nc_path1->next_hop)) != 0)
426 return 0;
427
428 return 1;
429}
430
431/**
432 * batadv_nc_hash_find - search for an existing nc path and return it
433 * @hash: hash table containing the nc path
434 * @data: search key
435 *
436 * Returns the nc_path if found, NULL otherwise.
437 */
438static struct batadv_nc_path *
439batadv_nc_hash_find(struct batadv_hashtable *hash,
440 void *data)
441{
442 struct hlist_head *head;
443 struct batadv_nc_path *nc_path, *nc_path_tmp = NULL;
444 int index;
445
446 if (!hash)
447 return NULL;
448
449 index = batadv_nc_hash_choose(data, hash->size);
450 head = &hash->table[index];
451
452 rcu_read_lock();
453 hlist_for_each_entry_rcu(nc_path, head, hash_entry) {
454 if (!batadv_nc_hash_compare(&nc_path->hash_entry, data))
455 continue;
456
457 if (!atomic_inc_not_zero(&nc_path->refcount))
458 continue;
459
460 nc_path_tmp = nc_path;
461 break;
462 }
463 rcu_read_unlock();
464
465 return nc_path_tmp;
466}
467
468/**
469 * batadv_nc_send_packet - send non-coded packet and free nc_packet struct
470 * @nc_packet: the nc packet to send
471 */
472static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
473{
474 batadv_send_skb_packet(nc_packet->skb,
475 nc_packet->neigh_node->if_incoming,
476 nc_packet->nc_path->next_hop);
477 nc_packet->skb = NULL;
478 batadv_nc_packet_free(nc_packet);
479}
480
481/**
482 * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet.
483 * @bat_priv: the bat priv with all the soft interface information
484 * @nc_path: the nc path the packet belongs to
485 * @nc_packet: the nc packet to be checked
486 *
487 * Checks whether the given sniffed (overheard) nc_packet has hit its buffering
488 * timeout. If so, the packet is no longer kept and the entry deleted from the
489 * queue. Has to be called with the appropriate locks.
490 *
491 * Returns false as soon as the entry in the fifo queue has not been timed out
492 * yet and true otherwise.
493 */
494static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
495 struct batadv_nc_path *nc_path,
496 struct batadv_nc_packet *nc_packet)
497{
498 unsigned long timeout = bat_priv->nc.max_buffer_time;
499 bool res = false;
500
501 /* Packets are added to tail, so the remaining packets did not time
502 * out and we can stop processing the current queue
503 */
504 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE &&
505 !batadv_has_timed_out(nc_packet->timestamp, timeout))
506 goto out;
507
508 /* purge nc packet */
509 list_del(&nc_packet->list);
510 batadv_nc_packet_free(nc_packet);
511
512 res = true;
513
514out:
515 return res;
516}
517
518/**
519 * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet.
520 * @bat_priv: the bat priv with all the soft interface information
521 * @nc_path: the nc path the packet belongs to
522 * @nc_packet: the nc packet to be checked
523 *
524 * Checks whether the given nc packet has hit its forward timeout. If so, the
525 * packet is no longer delayed, immediately sent and the entry deleted from the
526 * queue. Has to be called with the appropriate locks.
527 *
528 * Returns false as soon as the entry in the fifo queue has not been timed out
529 * yet and true otherwise.
530 */
531static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv,
532 struct batadv_nc_path *nc_path,
533 struct batadv_nc_packet *nc_packet)
534{
535 unsigned long timeout = bat_priv->nc.max_fwd_delay;
536
537 /* Packets are added to tail, so the remaining packets did not time
538 * out and we can stop processing the current queue
539 */
540 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE &&
541 !batadv_has_timed_out(nc_packet->timestamp, timeout))
542 return false;
543
544 /* Send packet */
545 batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
546 batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
547 nc_packet->skb->len + ETH_HLEN);
548 list_del(&nc_packet->list);
549 batadv_nc_send_packet(nc_packet);
550
551 return true;
552}
553
554/**
555 * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out
556 * nc packets
557 * @bat_priv: the bat priv with all the soft interface information
558 * @hash: to be processed hash table
559 * @process_fn: Function called to process given nc packet. Should return true
560 * to encourage this function to proceed with the next packet.
561 * Otherwise the rest of the current queue is skipped.
562 */
563static void
564batadv_nc_process_nc_paths(struct batadv_priv *bat_priv,
565 struct batadv_hashtable *hash,
566 bool (*process_fn)(struct batadv_priv *,
567 struct batadv_nc_path *,
568 struct batadv_nc_packet *))
569{
570 struct hlist_head *head;
571 struct batadv_nc_packet *nc_packet, *nc_packet_tmp;
572 struct batadv_nc_path *nc_path;
573 bool ret;
574 int i;
575
576 if (!hash)
577 return;
578
579 /* Loop hash table bins */
580 for (i = 0; i < hash->size; i++) {
581 head = &hash->table[i];
582
583 /* Loop coding paths */
584 rcu_read_lock();
585 hlist_for_each_entry_rcu(nc_path, head, hash_entry) {
586 /* Loop packets */
587 spin_lock_bh(&nc_path->packet_list_lock);
588 list_for_each_entry_safe(nc_packet, nc_packet_tmp,
589 &nc_path->packet_list, list) {
590 ret = process_fn(bat_priv, nc_path, nc_packet);
591 if (!ret)
592 break;
593 }
594 spin_unlock_bh(&nc_path->packet_list_lock);
595 }
596 rcu_read_unlock();
597 }
598}
599
600/**
601 * batadv_nc_worker - periodic task for house keeping related to network coding
602 * @work: kernel work struct
603 */
604static void batadv_nc_worker(struct work_struct *work)
605{
606 struct delayed_work *delayed_work;
607 struct batadv_priv_nc *priv_nc;
608 struct batadv_priv *bat_priv;
609 unsigned long timeout;
610
611 delayed_work = container_of(work, struct delayed_work, work);
612 priv_nc = container_of(delayed_work, struct batadv_priv_nc, work);
613 bat_priv = container_of(priv_nc, struct batadv_priv, nc);
614
615 batadv_nc_purge_orig_hash(bat_priv);
616 batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash,
617 batadv_nc_to_purge_nc_path_coding);
618 batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash,
619 batadv_nc_to_purge_nc_path_decoding);
620
621 timeout = bat_priv->nc.max_fwd_delay;
622
623 if (batadv_has_timed_out(bat_priv->nc.timestamp_fwd_flush, timeout)) {
624 batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.coding_hash,
625 batadv_nc_fwd_flush);
626 bat_priv->nc.timestamp_fwd_flush = jiffies;
627 }
628
629 if (batadv_has_timed_out(bat_priv->nc.timestamp_sniffed_purge,
630 bat_priv->nc.max_buffer_time)) {
631 batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.decoding_hash,
632 batadv_nc_sniffed_purge);
633 bat_priv->nc.timestamp_sniffed_purge = jiffies;
634 }
635
636 /* Schedule a new check */
637 batadv_nc_start_timer(bat_priv);
638}
639
640/**
641 * batadv_can_nc_with_orig - checks whether the given orig node is suitable for
642 * coding or not
643 * @bat_priv: the bat priv with all the soft interface information
644 * @orig_node: neighboring orig node which may be used as nc candidate
645 * @ogm_packet: incoming ogm packet also used for the checks
646 *
647 * Returns true if:
648 * 1) The OGM must have the most recent sequence number.
649 * 2) The TTL must be decremented by one and only one.
650 * 3) The OGM must be received from the first hop from orig_node.
651 * 4) The TQ value of the OGM must be above bat_priv->nc.min_tq.
652 */
653static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
654 struct batadv_orig_node *orig_node,
655 struct batadv_ogm_packet *ogm_packet)
656{
657 if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno))
658 return false;
659 if (orig_node->last_ttl != ogm_packet->header.ttl + 1)
660 return false;
661 if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender))
662 return false;
663 if (ogm_packet->tq < bat_priv->nc.min_tq)
664 return false;
665
666 return true;
667}
668
669/**
670 * batadv_nc_find_nc_node - search for an existing nc node and return it
671 * @orig_node: orig node originating the ogm packet
672 * @orig_neigh_node: neighboring orig node from which we received the ogm packet
673 * (can be equal to orig_node)
674 * @in_coding: traverse incoming or outgoing network coding list
675 *
676 * Returns the nc_node if found, NULL otherwise.
677 */
678static struct batadv_nc_node
679*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
680 struct batadv_orig_node *orig_neigh_node,
681 bool in_coding)
682{
683 struct batadv_nc_node *nc_node, *nc_node_out = NULL;
684 struct list_head *list;
685
686 if (in_coding)
687 list = &orig_neigh_node->in_coding_list;
688 else
689 list = &orig_neigh_node->out_coding_list;
690
691 /* Traverse list of nc_nodes to orig_node */
692 rcu_read_lock();
693 list_for_each_entry_rcu(nc_node, list, list) {
694 if (!batadv_compare_eth(nc_node->addr, orig_node->orig))
695 continue;
696
697 if (!atomic_inc_not_zero(&nc_node->refcount))
698 continue;
699
700 /* Found a match */
701 nc_node_out = nc_node;
702 break;
703 }
704 rcu_read_unlock();
705
706 return nc_node_out;
707}
708
709/**
710 * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was
711 * not found
712 * @bat_priv: the bat priv with all the soft interface information
713 * @orig_node: orig node originating the ogm packet
714 * @orig_neigh_node: neighboring orig node from which we received the ogm packet
715 * (can be equal to orig_node)
716 * @in_coding: traverse incoming or outgoing network coding list
717 *
718 * Returns the nc_node if found or created, NULL in case of an error.
719 */
720static struct batadv_nc_node
721*batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
722 struct batadv_orig_node *orig_node,
723 struct batadv_orig_node *orig_neigh_node,
724 bool in_coding)
725{
726 struct batadv_nc_node *nc_node;
727 spinlock_t *lock; /* Used to lock list selected by "int in_coding" */
728 struct list_head *list;
729
730 /* Check if nc_node is already added */
731 nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding);
732
733 /* Node found */
734 if (nc_node)
735 return nc_node;
736
737 nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC);
738 if (!nc_node)
739 return NULL;
740
741 if (!atomic_inc_not_zero(&orig_neigh_node->refcount))
742 goto free;
743
744 /* Initialize nc_node */
745 INIT_LIST_HEAD(&nc_node->list);
746 memcpy(nc_node->addr, orig_node->orig, ETH_ALEN);
747 nc_node->orig_node = orig_neigh_node;
748 atomic_set(&nc_node->refcount, 2);
749
750 /* Select ingoing or outgoing coding node */
751 if (in_coding) {
752 lock = &orig_neigh_node->in_coding_list_lock;
753 list = &orig_neigh_node->in_coding_list;
754 } else {
755 lock = &orig_neigh_node->out_coding_list_lock;
756 list = &orig_neigh_node->out_coding_list;
757 }
758
759 batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n",
760 nc_node->addr, nc_node->orig_node->orig);
761
762 /* Add nc_node to orig_node */
763 spin_lock_bh(lock);
764 list_add_tail_rcu(&nc_node->list, list);
765 spin_unlock_bh(lock);
766
767 return nc_node;
768
769free:
770 kfree(nc_node);
771 return NULL;
772}
773
774/**
775 * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node structs
776 * (best called on incoming OGMs)
777 * @bat_priv: the bat priv with all the soft interface information
778 * @orig_node: orig node originating the ogm packet
779 * @orig_neigh_node: neighboring orig node from which we received the ogm packet
780 * (can be equal to orig_node)
781 * @ogm_packet: incoming ogm packet
782 * @is_single_hop_neigh: orig_node is a single hop neighbor
783 */
784void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
785 struct batadv_orig_node *orig_node,
786 struct batadv_orig_node *orig_neigh_node,
787 struct batadv_ogm_packet *ogm_packet,
788 int is_single_hop_neigh)
789{
790 struct batadv_nc_node *in_nc_node = NULL, *out_nc_node = NULL;
791
792 /* Check if network coding is enabled */
793 if (!atomic_read(&bat_priv->network_coding))
794 goto out;
795
796 /* accept ogms from 'good' neighbors and single hop neighbors */
797 if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) &&
798 !is_single_hop_neigh)
799 goto out;
800
801 /* Add orig_node as in_nc_node on hop */
802 in_nc_node = batadv_nc_get_nc_node(bat_priv, orig_node,
803 orig_neigh_node, true);
804 if (!in_nc_node)
805 goto out;
806
807 in_nc_node->last_seen = jiffies;
808
809 /* Add hop as out_nc_node on orig_node */
810 out_nc_node = batadv_nc_get_nc_node(bat_priv, orig_neigh_node,
811 orig_node, false);
812 if (!out_nc_node)
813 goto out;
814
815 out_nc_node->last_seen = jiffies;
816
817out:
818 if (in_nc_node)
819 batadv_nc_node_free_ref(in_nc_node);
820 if (out_nc_node)
821 batadv_nc_node_free_ref(out_nc_node);
822}
823
824/**
825 * batadv_nc_get_path - get existing nc_path or allocate a new one
826 * @bat_priv: the bat priv with all the soft interface information
827 * @hash: hash table containing the nc path
828 * @src: ethernet source address - first half of the nc path search key
829 * @dst: ethernet destination address - second half of the nc path search key
830 *
831 * Returns pointer to nc_path if the path was found or created, returns NULL
832 * on error.
833 */
834static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
835 struct batadv_hashtable *hash,
836 uint8_t *src,
837 uint8_t *dst)
838{
839 int hash_added;
840 struct batadv_nc_path *nc_path, nc_path_key;
841
842 batadv_nc_hash_key_gen(&nc_path_key, src, dst);
843
844 /* Search for existing nc_path */
845 nc_path = batadv_nc_hash_find(hash, (void *)&nc_path_key);
846
847 if (nc_path) {
848 /* Set timestamp to delay removal of nc_path */
849 nc_path->last_valid = jiffies;
850 return nc_path;
851 }
852
853 /* No existing nc_path was found; create a new */
854 nc_path = kzalloc(sizeof(*nc_path), GFP_ATOMIC);
855
856 if (!nc_path)
857 return NULL;
858
859 /* Initialize nc_path */
860 INIT_LIST_HEAD(&nc_path->packet_list);
861 spin_lock_init(&nc_path->packet_list_lock);
862 atomic_set(&nc_path->refcount, 2);
863 nc_path->last_valid = jiffies;
864 memcpy(nc_path->next_hop, dst, ETH_ALEN);
865 memcpy(nc_path->prev_hop, src, ETH_ALEN);
866
867 batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n",
868 nc_path->prev_hop,
869 nc_path->next_hop);
870
871 /* Add nc_path to hash table */
872 hash_added = batadv_hash_add(hash, batadv_nc_hash_compare,
873 batadv_nc_hash_choose, &nc_path_key,
874 &nc_path->hash_entry);
875
876 if (hash_added < 0) {
877 kfree(nc_path);
878 return NULL;
879 }
880
881 return nc_path;
882}
883
884/**
885 * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair
886 * selection of a receiver with slightly lower TQ than the other
887 * @tq: to be weighted tq value
888 */
889static uint8_t batadv_nc_random_weight_tq(uint8_t tq)
890{
891 uint8_t rand_val, rand_tq;
892
893 get_random_bytes(&rand_val, sizeof(rand_val));
894
895 /* randomize the estimated packet loss (max TQ - estimated TQ) */
896 rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq);
897
898 /* normalize the randomized packet loss */
899 rand_tq /= BATADV_TQ_MAX_VALUE;
900
901 /* convert to (randomized) estimated tq again */
902 return BATADV_TQ_MAX_VALUE - rand_tq;
903}
904
905/**
906 * batadv_nc_memxor - XOR destination with source
907 * @dst: byte array to XOR into
908 * @src: byte array to XOR from
909 * @len: length of destination array
910 */
911static void batadv_nc_memxor(char *dst, const char *src, unsigned int len)
912{
913 unsigned int i;
914
915 for (i = 0; i < len; ++i)
916 dst[i] ^= src[i];
917}
918
919/**
920 * batadv_nc_code_packets - code a received unicast_packet with an nc packet
921 * into a coded_packet and send it
922 * @bat_priv: the bat priv with all the soft interface information
923 * @skb: data skb to forward
924 * @ethhdr: pointer to the ethernet header inside the skb
925 * @nc_packet: structure containing the packet to the skb can be coded with
926 * @neigh_node: next hop to forward packet to
927 *
928 * Returns true if both packets are consumed, false otherwise.
929 */
930static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
931 struct sk_buff *skb,
932 struct ethhdr *ethhdr,
933 struct batadv_nc_packet *nc_packet,
934 struct batadv_neigh_node *neigh_node)
935{
936 uint8_t tq_weighted_neigh, tq_weighted_coding;
937 struct sk_buff *skb_dest, *skb_src;
938 struct batadv_unicast_packet *packet1;
939 struct batadv_unicast_packet *packet2;
940 struct batadv_coded_packet *coded_packet;
941 struct batadv_neigh_node *neigh_tmp, *router_neigh;
942 struct batadv_neigh_node *router_coding = NULL;
943 uint8_t *first_source, *first_dest, *second_source, *second_dest;
944 __be32 packet_id1, packet_id2;
945 size_t count;
946 bool res = false;
947 int coding_len;
948 int unicast_size = sizeof(*packet1);
949 int coded_size = sizeof(*coded_packet);
950 int header_add = coded_size - unicast_size;
951
952 router_neigh = batadv_orig_node_get_router(neigh_node->orig_node);
953 if (!router_neigh)
954 goto out;
955
956 neigh_tmp = nc_packet->neigh_node;
957 router_coding = batadv_orig_node_get_router(neigh_tmp->orig_node);
958 if (!router_coding)
959 goto out;
960
961 tq_weighted_neigh = batadv_nc_random_weight_tq(router_neigh->tq_avg);
962 tq_weighted_coding = batadv_nc_random_weight_tq(router_coding->tq_avg);
963
964 /* Select one destination for the MAC-header dst-field based on
965 * weighted TQ-values.
966 */
967 if (tq_weighted_neigh >= tq_weighted_coding) {
968 /* Destination from nc_packet is selected for MAC-header */
969 first_dest = nc_packet->nc_path->next_hop;
970 first_source = nc_packet->nc_path->prev_hop;
971 second_dest = neigh_node->addr;
972 second_source = ethhdr->h_source;
973 packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data;
974 packet2 = (struct batadv_unicast_packet *)skb->data;
975 packet_id1 = nc_packet->packet_id;
976 packet_id2 = batadv_skb_crc32(skb,
977 skb->data + sizeof(*packet2));
978 } else {
979 /* Destination for skb is selected for MAC-header */
980 first_dest = neigh_node->addr;
981 first_source = ethhdr->h_source;
982 second_dest = nc_packet->nc_path->next_hop;
983 second_source = nc_packet->nc_path->prev_hop;
984 packet1 = (struct batadv_unicast_packet *)skb->data;
985 packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data;
986 packet_id1 = batadv_skb_crc32(skb,
987 skb->data + sizeof(*packet1));
988 packet_id2 = nc_packet->packet_id;
989 }
990
991 /* Instead of zero padding the smallest data buffer, we
992 * code into the largest.
993 */
994 if (skb->len <= nc_packet->skb->len) {
995 skb_dest = nc_packet->skb;
996 skb_src = skb;
997 } else {
998 skb_dest = skb;
999 skb_src = nc_packet->skb;
1000 }
1001
1002 /* coding_len is used when decoding the packet shorter packet */
1003 coding_len = skb_src->len - unicast_size;
1004
1005 if (skb_linearize(skb_dest) < 0 || skb_linearize(skb_src) < 0)
1006 goto out;
1007
1008 skb_push(skb_dest, header_add);
1009
1010 coded_packet = (struct batadv_coded_packet *)skb_dest->data;
1011 skb_reset_mac_header(skb_dest);
1012
1013 coded_packet->header.packet_type = BATADV_CODED;
1014 coded_packet->header.version = BATADV_COMPAT_VERSION;
1015 coded_packet->header.ttl = packet1->header.ttl;
1016
1017 /* Info about first unicast packet */
1018 memcpy(coded_packet->first_source, first_source, ETH_ALEN);
1019 memcpy(coded_packet->first_orig_dest, packet1->dest, ETH_ALEN);
1020 coded_packet->first_crc = packet_id1;
1021 coded_packet->first_ttvn = packet1->ttvn;
1022
1023 /* Info about second unicast packet */
1024 memcpy(coded_packet->second_dest, second_dest, ETH_ALEN);
1025 memcpy(coded_packet->second_source, second_source, ETH_ALEN);
1026 memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN);
1027 coded_packet->second_crc = packet_id2;
1028 coded_packet->second_ttl = packet2->header.ttl;
1029 coded_packet->second_ttvn = packet2->ttvn;
1030 coded_packet->coded_len = htons(coding_len);
1031
1032 /* This is where the magic happens: Code skb_src into skb_dest */
1033 batadv_nc_memxor(skb_dest->data + coded_size,
1034 skb_src->data + unicast_size, coding_len);
1035
1036 /* Update counters accordingly */
1037 if (BATADV_SKB_CB(skb_src)->decoded &&
1038 BATADV_SKB_CB(skb_dest)->decoded) {
1039 /* Both packets are recoded */
1040 count = skb_src->len + ETH_HLEN;
1041 count += skb_dest->len + ETH_HLEN;
1042 batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE, 2);
1043 batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, count);
1044 } else if (!BATADV_SKB_CB(skb_src)->decoded &&
1045 !BATADV_SKB_CB(skb_dest)->decoded) {
1046 /* Both packets are newly coded */
1047 count = skb_src->len + ETH_HLEN;
1048 count += skb_dest->len + ETH_HLEN;
1049 batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE, 2);
1050 batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, count);
1051 } else if (BATADV_SKB_CB(skb_src)->decoded &&
1052 !BATADV_SKB_CB(skb_dest)->decoded) {
1053 /* skb_src recoded and skb_dest is newly coded */
1054 batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE);
1055 batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES,
1056 skb_src->len + ETH_HLEN);
1057 batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE);
1058 batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES,
1059 skb_dest->len + ETH_HLEN);
1060 } else if (!BATADV_SKB_CB(skb_src)->decoded &&
1061 BATADV_SKB_CB(skb_dest)->decoded) {
1062 /* skb_src is newly coded and skb_dest is recoded */
1063 batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE);
1064 batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES,
1065 skb_src->len + ETH_HLEN);
1066 batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE);
1067 batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES,
1068 skb_dest->len + ETH_HLEN);
1069 }
1070
1071 /* skb_src is now coded into skb_dest, so free it */
1072 kfree_skb(skb_src);
1073
1074 /* avoid duplicate free of skb from nc_packet */
1075 nc_packet->skb = NULL;
1076 batadv_nc_packet_free(nc_packet);
1077
1078 /* Send the coded packet and return true */
1079 batadv_send_skb_packet(skb_dest, neigh_node->if_incoming, first_dest);
1080 res = true;
1081out:
1082 if (router_neigh)
1083 batadv_neigh_node_free_ref(router_neigh);
1084 if (router_coding)
1085 batadv_neigh_node_free_ref(router_coding);
1086 return res;
1087}
1088
1089/**
1090 * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst.
1091 * @skb: data skb to forward
1092 * @dst: destination mac address of the other skb to code with
1093 * @src: source mac address of skb
1094 *
1095 * Whenever we network code a packet we have to check whether we received it in
1096 * a network coded form. If so, we may not be able to use it for coding because
1097 * some neighbors may also have received (overheard) the packet in the network
1098 * coded form without being able to decode it. It is hard to know which of the
1099 * neighboring nodes was able to decode the packet, therefore we can only
1100 * re-code the packet if the source of the previous encoded packet is involved.
1101 * Since the source encoded the packet we can be certain it has all necessary
1102 * decode information.
1103 *
1104 * Returns true if coding of a decoded packet is allowed.
1105 */
1106static bool batadv_nc_skb_coding_possible(struct sk_buff *skb,
1107 uint8_t *dst, uint8_t *src)
1108{
1109 if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src))
1110 return false;
1111 else
1112 return true;
1113}
1114
1115/**
1116 * batadv_nc_path_search - Find the coding path matching in_nc_node and
1117 * out_nc_node to retrieve a buffered packet that can be used for coding.
1118 * @bat_priv: the bat priv with all the soft interface information
1119 * @in_nc_node: pointer to skb next hop's neighbor nc node
1120 * @out_nc_node: pointer to skb source's neighbor nc node
1121 * @skb: data skb to forward
1122 * @eth_dst: next hop mac address of skb
1123 *
1124 * Returns true if coding of a decoded skb is allowed.
1125 */
1126static struct batadv_nc_packet *
1127batadv_nc_path_search(struct batadv_priv *bat_priv,
1128 struct batadv_nc_node *in_nc_node,
1129 struct batadv_nc_node *out_nc_node,
1130 struct sk_buff *skb,
1131 uint8_t *eth_dst)
1132{
1133 struct batadv_nc_path *nc_path, nc_path_key;
1134 struct batadv_nc_packet *nc_packet_out = NULL;
1135 struct batadv_nc_packet *nc_packet, *nc_packet_tmp;
1136 struct batadv_hashtable *hash = bat_priv->nc.coding_hash;
1137 int idx;
1138
1139 if (!hash)
1140 return NULL;
1141
1142 /* Create almost path key */
1143 batadv_nc_hash_key_gen(&nc_path_key, in_nc_node->addr,
1144 out_nc_node->addr);
1145 idx = batadv_nc_hash_choose(&nc_path_key, hash->size);
1146
1147 /* Check for coding opportunities in this nc_path */
1148 rcu_read_lock();
1149 hlist_for_each_entry_rcu(nc_path, &hash->table[idx], hash_entry) {
1150 if (!batadv_compare_eth(nc_path->prev_hop, in_nc_node->addr))
1151 continue;
1152
1153 if (!batadv_compare_eth(nc_path->next_hop, out_nc_node->addr))
1154 continue;
1155
1156 spin_lock_bh(&nc_path->packet_list_lock);
1157 if (list_empty(&nc_path->packet_list)) {
1158 spin_unlock_bh(&nc_path->packet_list_lock);
1159 continue;
1160 }
1161
1162 list_for_each_entry_safe(nc_packet, nc_packet_tmp,
1163 &nc_path->packet_list, list) {
1164 if (!batadv_nc_skb_coding_possible(nc_packet->skb,
1165 eth_dst,
1166 in_nc_node->addr))
1167 continue;
1168
1169 /* Coding opportunity is found! */
1170 list_del(&nc_packet->list);
1171 nc_packet_out = nc_packet;
1172 break;
1173 }
1174
1175 spin_unlock_bh(&nc_path->packet_list_lock);
1176 break;
1177 }
1178 rcu_read_unlock();
1179
1180 return nc_packet_out;
1181}
1182
1183/**
1184 * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the
1185 * skb's sender (may be equal to the originator).
1186 * @bat_priv: the bat priv with all the soft interface information
1187 * @skb: data skb to forward
1188 * @eth_dst: next hop mac address of skb
1189 * @eth_src: source mac address of skb
1190 * @in_nc_node: pointer to skb next hop's neighbor nc node
1191 *
1192 * Returns an nc packet if a suitable coding packet was found, NULL otherwise.
1193 */
1194static struct batadv_nc_packet *
1195batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
1196 struct sk_buff *skb,
1197 uint8_t *eth_dst,
1198 uint8_t *eth_src,
1199 struct batadv_nc_node *in_nc_node)
1200{
1201 struct batadv_orig_node *orig_node;
1202 struct batadv_nc_node *out_nc_node;
1203 struct batadv_nc_packet *nc_packet = NULL;
1204
1205 orig_node = batadv_orig_hash_find(bat_priv, eth_src);
1206 if (!orig_node)
1207 return NULL;
1208
1209 rcu_read_lock();
1210 list_for_each_entry_rcu(out_nc_node,
1211 &orig_node->out_coding_list, list) {
1212 /* Check if the skb is decoded and if recoding is possible */
1213 if (!batadv_nc_skb_coding_possible(skb,
1214 out_nc_node->addr, eth_src))
1215 continue;
1216
1217 /* Search for an opportunity in this nc_path */
1218 nc_packet = batadv_nc_path_search(bat_priv, in_nc_node,
1219 out_nc_node, skb, eth_dst);
1220 if (nc_packet)
1221 break;
1222 }
1223 rcu_read_unlock();
1224
1225 batadv_orig_node_free_ref(orig_node);
1226 return nc_packet;
1227}
1228
1229/**
1230 * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the
1231 * unicast skb before it is stored for use in later decoding
1232 * @bat_priv: the bat priv with all the soft interface information
1233 * @skb: data skb to store
1234 * @eth_dst_new: new destination mac address of skb
1235 */
1236static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
1237 struct sk_buff *skb,
1238 uint8_t *eth_dst_new)
1239{
1240 struct ethhdr *ethhdr;
1241
1242 /* Copy skb header to change the mac header */
1243 skb = pskb_copy(skb, GFP_ATOMIC);
1244 if (!skb)
1245 return;
1246
1247 /* Set the mac header as if we actually sent the packet uncoded */
1248 ethhdr = (struct ethhdr *)skb_mac_header(skb);
1249 memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN);
1250 memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN);
1251
1252 /* Set data pointer to MAC header to mimic packets from our tx path */
1253 skb_push(skb, ETH_HLEN);
1254
1255 /* Add the packet to the decoding packet pool */
1256 batadv_nc_skb_store_for_decoding(bat_priv, skb);
1257
1258 /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free
1259 * our ref
1260 */
1261 kfree_skb(skb);
1262}
1263
1264/**
1265 * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst.
1266 * @skb: data skb to forward
1267 * @neigh_node: next hop to forward packet to
1268 * @ethhdr: pointer to the ethernet header inside the skb
1269 *
1270 * Loops through list of neighboring nodes the next hop has a good connection to
1271 * (receives OGMs with a sufficient quality). We need to find a neighbor of our
1272 * next hop that potentially sent a packet which our next hop also received
1273 * (overheard) and has stored for later decoding.
1274 *
1275 * Returns true if the skb was consumed (encoded packet sent) or false otherwise
1276 */
1277static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
1278 struct batadv_neigh_node *neigh_node,
1279 struct ethhdr *ethhdr)
1280{
1281 struct net_device *netdev = neigh_node->if_incoming->soft_iface;
1282 struct batadv_priv *bat_priv = netdev_priv(netdev);
1283 struct batadv_orig_node *orig_node = neigh_node->orig_node;
1284 struct batadv_nc_node *nc_node;
1285 struct batadv_nc_packet *nc_packet = NULL;
1286
1287 rcu_read_lock();
1288 list_for_each_entry_rcu(nc_node, &orig_node->in_coding_list, list) {
1289 /* Search for coding opportunity with this in_nc_node */
1290 nc_packet = batadv_nc_skb_src_search(bat_priv, skb,
1291 neigh_node->addr,
1292 ethhdr->h_source, nc_node);
1293
1294 /* Opportunity was found, so stop searching */
1295 if (nc_packet)
1296 break;
1297 }
1298 rcu_read_unlock();
1299
1300 if (!nc_packet)
1301 return false;
1302
1303 /* Save packets for later decoding */
1304 batadv_nc_skb_store_before_coding(bat_priv, skb,
1305 neigh_node->addr);
1306 batadv_nc_skb_store_before_coding(bat_priv, nc_packet->skb,
1307 nc_packet->neigh_node->addr);
1308
1309 /* Code and send packets */
1310 if (batadv_nc_code_packets(bat_priv, skb, ethhdr, nc_packet,
1311 neigh_node))
1312 return true;
1313
1314 /* out of mem ? Coding failed - we have to free the buffered packet
1315 * to avoid memleaks. The skb passed as argument will be dealt with
1316 * by the calling function.
1317 */
1318 batadv_nc_send_packet(nc_packet);
1319 return false;
1320}
1321
1322/**
1323 * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding
1324 * @skb: skb to add to path
1325 * @nc_path: path to add skb to
1326 * @neigh_node: next hop to forward packet to
1327 * @packet_id: checksum to identify packet
1328 *
1329 * Returns true if the packet was buffered or false in case of an error.
1330 */
1331static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
1332 struct batadv_nc_path *nc_path,
1333 struct batadv_neigh_node *neigh_node,
1334 __be32 packet_id)
1335{
1336 struct batadv_nc_packet *nc_packet;
1337
1338 nc_packet = kzalloc(sizeof(*nc_packet), GFP_ATOMIC);
1339 if (!nc_packet)
1340 return false;
1341
1342 /* Initialize nc_packet */
1343 nc_packet->timestamp = jiffies;
1344 nc_packet->packet_id = packet_id;
1345 nc_packet->skb = skb;
1346 nc_packet->neigh_node = neigh_node;
1347 nc_packet->nc_path = nc_path;
1348
1349 /* Add coding packet to list */
1350 spin_lock_bh(&nc_path->packet_list_lock);
1351 list_add_tail(&nc_packet->list, &nc_path->packet_list);
1352 spin_unlock_bh(&nc_path->packet_list_lock);
1353
1354 return true;
1355}
1356
1357/**
1358 * batadv_nc_skb_forward - try to code a packet or add it to the coding packet
1359 * buffer
1360 * @skb: data skb to forward
1361 * @neigh_node: next hop to forward packet to
1362 * @ethhdr: pointer to the ethernet header inside the skb
1363 *
1364 * Returns true if the skb was consumed (encoded packet sent) or false otherwise
1365 */
1366bool batadv_nc_skb_forward(struct sk_buff *skb,
1367 struct batadv_neigh_node *neigh_node,
1368 struct ethhdr *ethhdr)
1369{
1370 const struct net_device *netdev = neigh_node->if_incoming->soft_iface;
1371 struct batadv_priv *bat_priv = netdev_priv(netdev);
1372 struct batadv_unicast_packet *packet;
1373 struct batadv_nc_path *nc_path;
1374 __be32 packet_id;
1375 u8 *payload;
1376
1377 /* Check if network coding is enabled */
1378 if (!atomic_read(&bat_priv->network_coding))
1379 goto out;
1380
1381 /* We only handle unicast packets */
1382 payload = skb_network_header(skb);
1383 packet = (struct batadv_unicast_packet *)payload;
1384 if (packet->header.packet_type != BATADV_UNICAST)
1385 goto out;
1386
1387 /* Try to find a coding opportunity and send the skb if one is found */
1388 if (batadv_nc_skb_dst_search(skb, neigh_node, ethhdr))
1389 return true;
1390
1391 /* Find or create a nc_path for this src-dst pair */
1392 nc_path = batadv_nc_get_path(bat_priv,
1393 bat_priv->nc.coding_hash,
1394 ethhdr->h_source,
1395 neigh_node->addr);
1396
1397 if (!nc_path)
1398 goto out;
1399
1400 /* Add skb to nc_path */
1401 packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet));
1402 if (!batadv_nc_skb_add_to_path(skb, nc_path, neigh_node, packet_id))
1403 goto free_nc_path;
1404
1405 /* Packet is consumed */
1406 return true;
1407
1408free_nc_path:
1409 batadv_nc_path_free_ref(nc_path);
1410out:
1411 /* Packet is not consumed */
1412 return false;
1413}
1414
1415/**
1416 * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used
1417 * when decoding coded packets
1418 * @bat_priv: the bat priv with all the soft interface information
1419 * @skb: data skb to store
1420 */
1421void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
1422 struct sk_buff *skb)
1423{
1424 struct batadv_unicast_packet *packet;
1425 struct batadv_nc_path *nc_path;
1426 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
1427 __be32 packet_id;
1428 u8 *payload;
1429
1430 /* Check if network coding is enabled */
1431 if (!atomic_read(&bat_priv->network_coding))
1432 goto out;
1433
1434 /* Check for supported packet type */
1435 payload = skb_network_header(skb);
1436 packet = (struct batadv_unicast_packet *)payload;
1437 if (packet->header.packet_type != BATADV_UNICAST)
1438 goto out;
1439
1440 /* Find existing nc_path or create a new */
1441 nc_path = batadv_nc_get_path(bat_priv,
1442 bat_priv->nc.decoding_hash,
1443 ethhdr->h_source,
1444 ethhdr->h_dest);
1445
1446 if (!nc_path)
1447 goto out;
1448
1449 /* Clone skb and adjust skb->data to point at batman header */
1450 skb = skb_clone(skb, GFP_ATOMIC);
1451 if (unlikely(!skb))
1452 goto free_nc_path;
1453
1454 if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
1455 goto free_skb;
1456
1457 if (unlikely(!skb_pull_rcsum(skb, ETH_HLEN)))
1458 goto free_skb;
1459
1460 /* Add skb to nc_path */
1461 packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet));
1462 if (!batadv_nc_skb_add_to_path(skb, nc_path, NULL, packet_id))
1463 goto free_skb;
1464
1465 batadv_inc_counter(bat_priv, BATADV_CNT_NC_BUFFER);
1466 return;
1467
1468free_skb:
1469 kfree_skb(skb);
1470free_nc_path:
1471 batadv_nc_path_free_ref(nc_path);
1472out:
1473 return;
1474}
1475
1476/**
1477 * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet
1478 * should be saved in the decoding buffer and, if so, store it there
1479 * @bat_priv: the bat priv with all the soft interface information
1480 * @skb: unicast skb to store
1481 */
1482void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
1483 struct sk_buff *skb)
1484{
1485 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
1486
1487 if (batadv_is_my_mac(bat_priv, ethhdr->h_dest))
1488 return;
1489
1490 /* Set data pointer to MAC header to mimic packets from our tx path */
1491 skb_push(skb, ETH_HLEN);
1492
1493 batadv_nc_skb_store_for_decoding(bat_priv, skb);
1494}
1495
1496/**
1497 * batadv_nc_skb_decode_packet - decode given skb using the decode data stored
1498 * in nc_packet
1499 * @bat_priv: the bat priv with all the soft interface information
1500 * @skb: unicast skb to decode
1501 * @nc_packet: decode data needed to decode the skb
1502 *
1503 * Returns pointer to decoded unicast packet if the packet was decoded or NULL
1504 * in case of an error.
1505 */
1506static struct batadv_unicast_packet *
1507batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
1508 struct batadv_nc_packet *nc_packet)
1509{
1510 const int h_size = sizeof(struct batadv_unicast_packet);
1511 const int h_diff = sizeof(struct batadv_coded_packet) - h_size;
1512 struct batadv_unicast_packet *unicast_packet;
1513 struct batadv_coded_packet coded_packet_tmp;
1514 struct ethhdr *ethhdr, ethhdr_tmp;
1515 uint8_t *orig_dest, ttl, ttvn;
1516 unsigned int coding_len;
1517
1518 /* Save headers temporarily */
1519 memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp));
1520 memcpy(&ethhdr_tmp, skb_mac_header(skb), sizeof(ethhdr_tmp));
1521
1522 if (skb_cow(skb, 0) < 0)
1523 return NULL;
1524
1525 if (unlikely(!skb_pull_rcsum(skb, h_diff)))
1526 return NULL;
1527
1528 /* Data points to batman header, so set mac header 14 bytes before
1529 * and network to data
1530 */
1531 skb_set_mac_header(skb, -ETH_HLEN);
1532 skb_reset_network_header(skb);
1533
1534 /* Reconstruct original mac header */
1535 ethhdr = (struct ethhdr *)skb_mac_header(skb);
1536 memcpy(ethhdr, &ethhdr_tmp, sizeof(*ethhdr));
1537
1538 /* Select the correct unicast header information based on the location
1539 * of our mac address in the coded_packet header
1540 */
1541 if (batadv_is_my_mac(bat_priv, coded_packet_tmp.second_dest)) {
1542 /* If we are the second destination the packet was overheard,
1543 * so the Ethernet address must be copied to h_dest and
1544 * pkt_type changed from PACKET_OTHERHOST to PACKET_HOST
1545 */
1546 memcpy(ethhdr->h_dest, coded_packet_tmp.second_dest, ETH_ALEN);
1547 skb->pkt_type = PACKET_HOST;
1548
1549 orig_dest = coded_packet_tmp.second_orig_dest;
1550 ttl = coded_packet_tmp.second_ttl;
1551 ttvn = coded_packet_tmp.second_ttvn;
1552 } else {
1553 orig_dest = coded_packet_tmp.first_orig_dest;
1554 ttl = coded_packet_tmp.header.ttl;
1555 ttvn = coded_packet_tmp.first_ttvn;
1556 }
1557
1558 coding_len = ntohs(coded_packet_tmp.coded_len);
1559
1560 if (coding_len > skb->len)
1561 return NULL;
1562
1563 /* Here the magic is reversed:
1564 * extract the missing packet from the received coded packet
1565 */
1566 batadv_nc_memxor(skb->data + h_size,
1567 nc_packet->skb->data + h_size,
1568 coding_len);
1569
1570 /* Resize decoded skb if decoded with larger packet */
1571 if (nc_packet->skb->len > coding_len + h_size)
1572 pskb_trim_rcsum(skb, coding_len + h_size);
1573
1574 /* Create decoded unicast packet */
1575 unicast_packet = (struct batadv_unicast_packet *)skb->data;
1576 unicast_packet->header.packet_type = BATADV_UNICAST;
1577 unicast_packet->header.version = BATADV_COMPAT_VERSION;
1578 unicast_packet->header.ttl = ttl;
1579 memcpy(unicast_packet->dest, orig_dest, ETH_ALEN);
1580 unicast_packet->ttvn = ttvn;
1581
1582 batadv_nc_packet_free(nc_packet);
1583 return unicast_packet;
1584}
1585
1586/**
1587 * batadv_nc_find_decoding_packet - search through buffered decoding data to
1588 * find the data needed to decode the coded packet
1589 * @bat_priv: the bat priv with all the soft interface information
1590 * @ethhdr: pointer to the ethernet header inside the coded packet
1591 * @coded: coded packet we try to find decode data for
1592 *
1593 * Returns pointer to nc packet if the needed data was found or NULL otherwise.
1594 */
1595static struct batadv_nc_packet *
1596batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
1597 struct ethhdr *ethhdr,
1598 struct batadv_coded_packet *coded)
1599{
1600 struct batadv_hashtable *hash = bat_priv->nc.decoding_hash;
1601 struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL;
1602 struct batadv_nc_path *nc_path, nc_path_key;
1603 uint8_t *dest, *source;
1604 __be32 packet_id;
1605 int index;
1606
1607 if (!hash)
1608 return NULL;
1609
1610 /* Select the correct packet id based on the location of our mac-addr */
1611 dest = ethhdr->h_source;
1612 if (!batadv_is_my_mac(bat_priv, coded->second_dest)) {
1613 source = coded->second_source;
1614 packet_id = coded->second_crc;
1615 } else {
1616 source = coded->first_source;
1617 packet_id = coded->first_crc;
1618 }
1619
1620 batadv_nc_hash_key_gen(&nc_path_key, source, dest);
1621 index = batadv_nc_hash_choose(&nc_path_key, hash->size);
1622
1623 /* Search for matching coding path */
1624 rcu_read_lock();
1625 hlist_for_each_entry_rcu(nc_path, &hash->table[index], hash_entry) {
1626 /* Find matching nc_packet */
1627 spin_lock_bh(&nc_path->packet_list_lock);
1628 list_for_each_entry(tmp_nc_packet,
1629 &nc_path->packet_list, list) {
1630 if (packet_id == tmp_nc_packet->packet_id) {
1631 list_del(&tmp_nc_packet->list);
1632
1633 nc_packet = tmp_nc_packet;
1634 break;
1635 }
1636 }
1637 spin_unlock_bh(&nc_path->packet_list_lock);
1638
1639 if (nc_packet)
1640 break;
1641 }
1642 rcu_read_unlock();
1643
1644 if (!nc_packet)
1645 batadv_dbg(BATADV_DBG_NC, bat_priv,
1646 "No decoding packet found for %u\n", packet_id);
1647
1648 return nc_packet;
1649}
1650
1651/**
1652 * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the
1653 * resulting unicast packet
1654 * @skb: incoming coded packet
1655 * @recv_if: pointer to interface this packet was received on
1656 */
1657static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
1658 struct batadv_hard_iface *recv_if)
1659{
1660 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1661 struct batadv_unicast_packet *unicast_packet;
1662 struct batadv_coded_packet *coded_packet;
1663 struct batadv_nc_packet *nc_packet;
1664 struct ethhdr *ethhdr;
1665 int hdr_size = sizeof(*coded_packet);
1666
1667 /* Check if network coding is enabled */
1668 if (!atomic_read(&bat_priv->network_coding))
1669 return NET_RX_DROP;
1670
1671 /* Make sure we can access (and remove) header */
1672 if (unlikely(!pskb_may_pull(skb, hdr_size)))
1673 return NET_RX_DROP;
1674
1675 coded_packet = (struct batadv_coded_packet *)skb->data;
1676 ethhdr = (struct ethhdr *)skb_mac_header(skb);
1677
1678 /* Verify frame is destined for us */
1679 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) &&
1680 !batadv_is_my_mac(bat_priv, coded_packet->second_dest))
1681 return NET_RX_DROP;
1682
1683 /* Update stat counter */
1684 if (batadv_is_my_mac(bat_priv, coded_packet->second_dest))
1685 batadv_inc_counter(bat_priv, BATADV_CNT_NC_SNIFFED);
1686
1687 nc_packet = batadv_nc_find_decoding_packet(bat_priv, ethhdr,
1688 coded_packet);
1689 if (!nc_packet) {
1690 batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
1691 return NET_RX_DROP;
1692 }
1693
1694 /* Make skb's linear, because decoding accesses the entire buffer */
1695 if (skb_linearize(skb) < 0)
1696 goto free_nc_packet;
1697
1698 if (skb_linearize(nc_packet->skb) < 0)
1699 goto free_nc_packet;
1700
1701 /* Decode the packet */
1702 unicast_packet = batadv_nc_skb_decode_packet(bat_priv, skb, nc_packet);
1703 if (!unicast_packet) {
1704 batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
1705 goto free_nc_packet;
1706 }
1707
1708 /* Mark packet as decoded to do correct recoding when forwarding */
1709 BATADV_SKB_CB(skb)->decoded = true;
1710 batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE);
1711 batadv_add_counter(bat_priv, BATADV_CNT_NC_DECODE_BYTES,
1712 skb->len + ETH_HLEN);
1713 return batadv_recv_unicast_packet(skb, recv_if);
1714
1715free_nc_packet:
1716 batadv_nc_packet_free(nc_packet);
1717 return NET_RX_DROP;
1718}
1719
1720/**
1721 * batadv_nc_free - clean up network coding memory
1722 * @bat_priv: the bat priv with all the soft interface information
1723 */
1724void batadv_nc_free(struct batadv_priv *bat_priv)
1725{
1726 batadv_recv_handler_unregister(BATADV_CODED);
1727 cancel_delayed_work_sync(&bat_priv->nc.work);
1728
1729 batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL);
1730 batadv_hash_destroy(bat_priv->nc.coding_hash);
1731 batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, NULL);
1732 batadv_hash_destroy(bat_priv->nc.decoding_hash);
1733}
1734
1735/**
1736 * batadv_nc_nodes_seq_print_text - print the nc node information
1737 * @seq: seq file to print on
1738 * @offset: not used
1739 */
1740int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
1741{
1742 struct net_device *net_dev = (struct net_device *)seq->private;
1743 struct batadv_priv *bat_priv = netdev_priv(net_dev);
1744 struct batadv_hashtable *hash = bat_priv->orig_hash;
1745 struct batadv_hard_iface *primary_if;
1746 struct hlist_head *head;
1747 struct batadv_orig_node *orig_node;
1748 struct batadv_nc_node *nc_node;
1749 int i;
1750
1751 primary_if = batadv_seq_print_text_primary_if_get(seq);
1752 if (!primary_if)
1753 goto out;
1754
1755 /* Traverse list of originators */
1756 for (i = 0; i < hash->size; i++) {
1757 head = &hash->table[i];
1758
1759 /* For each orig_node in this bin */
1760 rcu_read_lock();
1761 hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
1762 seq_printf(seq, "Node: %pM\n", orig_node->orig);
1763
1764 seq_puts(seq, " Ingoing: ");
1765 /* For each in_nc_node to this orig_node */
1766 list_for_each_entry_rcu(nc_node,
1767 &orig_node->in_coding_list,
1768 list)
1769 seq_printf(seq, "%pM ",
1770 nc_node->addr);
1771 seq_puts(seq, "\n");
1772
1773 seq_puts(seq, " Outgoing: ");
1774 /* For out_nc_node to this orig_node */
1775 list_for_each_entry_rcu(nc_node,
1776 &orig_node->out_coding_list,
1777 list)
1778 seq_printf(seq, "%pM ",
1779 nc_node->addr);
1780 seq_puts(seq, "\n\n");
1781 }
1782 rcu_read_unlock();
1783 }
1784
1785out:
1786 if (primary_if)
1787 batadv_hardif_free_ref(primary_if);
1788 return 0;
1789}
1790
1791/**
1792 * batadv_nc_init_debugfs - create nc folder and related files in debugfs
1793 * @bat_priv: the bat priv with all the soft interface information
1794 */
1795int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
1796{
1797 struct dentry *nc_dir, *file;
1798
1799 nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir);
1800 if (!nc_dir)
1801 goto out;
1802
1803 file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir,
1804 &bat_priv->nc.min_tq);
1805 if (!file)
1806 goto out;
1807
1808 file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir,
1809 &bat_priv->nc.max_fwd_delay);
1810 if (!file)
1811 goto out;
1812
1813 file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir,
1814 &bat_priv->nc.max_buffer_time);
1815 if (!file)
1816 goto out;
1817
1818 return 0;
1819
1820out:
1821 return -ENOMEM;
1822}
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
new file mode 100644
index 000000000000..4fa6d0caddbd
--- /dev/null
+++ b/net/batman-adv/network-coding.h
@@ -0,0 +1,123 @@
1/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors:
2 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_
21#define _NET_BATMAN_ADV_NETWORK_CODING_H_
22
23#ifdef CONFIG_BATMAN_ADV_NC
24
25int batadv_nc_init(struct batadv_priv *bat_priv);
26void batadv_nc_free(struct batadv_priv *bat_priv);
27void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
28 struct batadv_orig_node *orig_node,
29 struct batadv_orig_node *orig_neigh_node,
30 struct batadv_ogm_packet *ogm_packet,
31 int is_single_hop_neigh);
32void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
33 struct batadv_orig_node *orig_node,
34 bool (*to_purge)(struct batadv_priv *,
35 struct batadv_nc_node *));
36void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv);
37void batadv_nc_init_orig(struct batadv_orig_node *orig_node);
38bool batadv_nc_skb_forward(struct sk_buff *skb,
39 struct batadv_neigh_node *neigh_node,
40 struct ethhdr *ethhdr);
41void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
42 struct sk_buff *skb);
43void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
44 struct sk_buff *skb);
45int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset);
46int batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
47
48#else /* ifdef CONFIG_BATMAN_ADV_NC */
49
50static inline int batadv_nc_init(struct batadv_priv *bat_priv)
51{
52 return 0;
53}
54
55static inline void batadv_nc_free(struct batadv_priv *bat_priv)
56{
57 return;
58}
59
60static inline void
61batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
62 struct batadv_orig_node *orig_node,
63 struct batadv_orig_node *orig_neigh_node,
64 struct batadv_ogm_packet *ogm_packet,
65 int is_single_hop_neigh)
66{
67 return;
68}
69
70static inline void
71batadv_nc_purge_orig(struct batadv_priv *bat_priv,
72 struct batadv_orig_node *orig_node,
73 bool (*to_purge)(struct batadv_priv *,
74 struct batadv_nc_node *))
75{
76 return;
77}
78
79static inline void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
80{
81 return;
82}
83
84static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
85{
86 return;
87}
88
89static inline bool batadv_nc_skb_forward(struct sk_buff *skb,
90 struct batadv_neigh_node *neigh_node,
91 struct ethhdr *ethhdr)
92{
93 return false;
94}
95
96static inline void
97batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
98 struct sk_buff *skb)
99{
100 return;
101}
102
103static inline void
104batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
105 struct sk_buff *skb)
106{
107 return;
108}
109
110static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq,
111 void *offset)
112{
113 return 0;
114}
115
116static inline int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
117{
118 return 0;
119}
120
121#endif /* ifdef CONFIG_BATMAN_ADV_NC */
122
123#endif /* _NET_BATMAN_ADV_NETWORK_CODING_H_ */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 96fb80b724dc..2f3452546636 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -28,6 +28,7 @@
28#include "unicast.h" 28#include "unicast.h"
29#include "soft-interface.h" 29#include "soft-interface.h"
30#include "bridge_loop_avoidance.h" 30#include "bridge_loop_avoidance.h"
31#include "network-coding.h"
31 32
32/* hash class keys */ 33/* hash class keys */
33static struct lock_class_key batadv_orig_hash_lock_class_key; 34static struct lock_class_key batadv_orig_hash_lock_class_key;
@@ -142,6 +143,9 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
142 143
143 spin_unlock_bh(&orig_node->neigh_list_lock); 144 spin_unlock_bh(&orig_node->neigh_list_lock);
144 145
146 /* Free nc_nodes */
147 batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
148
145 batadv_frag_list_free(&orig_node->frag_list); 149 batadv_frag_list_free(&orig_node->frag_list);
146 batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, 150 batadv_tt_global_del_orig(orig_node->bat_priv, orig_node,
147 "originator timed out"); 151 "originator timed out");
@@ -219,6 +223,8 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
219 spin_lock_init(&orig_node->neigh_list_lock); 223 spin_lock_init(&orig_node->neigh_list_lock);
220 spin_lock_init(&orig_node->tt_buff_lock); 224 spin_lock_init(&orig_node->tt_buff_lock);
221 225
226 batadv_nc_init_orig(orig_node);
227
222 /* extra reference for return */ 228 /* extra reference for return */
223 atomic_set(&orig_node->refcount, 2); 229 atomic_set(&orig_node->refcount, 2);
224 230
@@ -459,7 +465,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
459 neigh_node_tmp->tq_avg); 465 neigh_node_tmp->tq_avg);
460 } 466 }
461 467
462 seq_printf(seq, "\n"); 468 seq_puts(seq, "\n");
463 batman_count++; 469 batman_count++;
464 470
465next: 471next:
@@ -469,7 +475,7 @@ next:
469 } 475 }
470 476
471 if (batman_count == 0) 477 if (batman_count == 0)
472 seq_printf(seq, "No batman nodes in range ...\n"); 478 seq_puts(seq, "No batman nodes in range ...\n");
473 479
474out: 480out:
475 if (primary_if) 481 if (primary_if)
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index ed0aa89bbf8b..a51ccfc39da4 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -30,6 +30,7 @@ enum batadv_packettype {
30 BATADV_TT_QUERY = 0x07, 30 BATADV_TT_QUERY = 0x07,
31 BATADV_ROAM_ADV = 0x08, 31 BATADV_ROAM_ADV = 0x08,
32 BATADV_UNICAST_4ADDR = 0x09, 32 BATADV_UNICAST_4ADDR = 0x09,
33 BATADV_CODED = 0x0a,
33}; 34};
34 35
35/** 36/**
@@ -278,4 +279,36 @@ struct batadv_tt_change {
278 uint8_t addr[ETH_ALEN]; 279 uint8_t addr[ETH_ALEN];
279} __packed; 280} __packed;
280 281
282/**
283 * struct batadv_coded_packet - network coded packet
284 * @header: common batman packet header and ttl of first included packet
285 * @reserved: Align following fields to 2-byte boundaries
286 * @first_source: original source of first included packet
287 * @first_orig_dest: original destinal of first included packet
288 * @first_crc: checksum of first included packet
289 * @first_ttvn: tt-version number of first included packet
290 * @second_ttl: ttl of second packet
291 * @second_dest: second receiver of this coded packet
292 * @second_source: original source of second included packet
293 * @second_orig_dest: original destination of second included packet
294 * @second_crc: checksum of second included packet
295 * @second_ttvn: tt version number of second included packet
296 * @coded_len: length of network coded part of the payload
297 */
298struct batadv_coded_packet {
299 struct batadv_header header;
300 uint8_t first_ttvn;
301 /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */
302 uint8_t first_source[ETH_ALEN];
303 uint8_t first_orig_dest[ETH_ALEN];
304 __be32 first_crc;
305 uint8_t second_ttl;
306 uint8_t second_ttvn;
307 uint8_t second_dest[ETH_ALEN];
308 uint8_t second_source[ETH_ALEN];
309 uint8_t second_orig_dest[ETH_ALEN];
310 __be32 second_crc;
311 __be16 coded_len;
312};
313
281#endif /* _NET_BATMAN_ADV_PACKET_H_ */ 314#endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 319f2906c71a..2f1f88923df8 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -29,6 +29,7 @@
29#include "unicast.h" 29#include "unicast.h"
30#include "bridge_loop_avoidance.h" 30#include "bridge_loop_avoidance.h"
31#include "distributed-arp-table.h" 31#include "distributed-arp-table.h"
32#include "network-coding.h"
32 33
33static int batadv_route_unicast_packet(struct sk_buff *skb, 34static int batadv_route_unicast_packet(struct sk_buff *skb,
34 struct batadv_hard_iface *recv_if); 35 struct batadv_hard_iface *recv_if);
@@ -548,6 +549,17 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
548 return router; 549 return router;
549} 550}
550 551
552/**
553 * batadv_check_unicast_packet - Check for malformed unicast packets
554 * @bat_priv: the bat priv with all the soft interface information
555 * @skb: packet to check
556 * @hdr_size: size of header to pull
557 *
558 * Check for short header and bad addresses in given packet. Returns negative
559 * value when check fails and 0 otherwise. The negative value depends on the
560 * reason: -ENODATA for bad header, -EBADR for broadcast destination or source,
561 * and -EREMOTE for non-local (other host) destination.
562 */
551static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, 563static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
552 struct sk_buff *skb, int hdr_size) 564 struct sk_buff *skb, int hdr_size)
553{ 565{
@@ -555,21 +567,21 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
555 567
556 /* drop packet if it has not necessary minimum size */ 568 /* drop packet if it has not necessary minimum size */
557 if (unlikely(!pskb_may_pull(skb, hdr_size))) 569 if (unlikely(!pskb_may_pull(skb, hdr_size)))
558 return -1; 570 return -ENODATA;
559 571
560 ethhdr = (struct ethhdr *)skb_mac_header(skb); 572 ethhdr = (struct ethhdr *)skb_mac_header(skb);
561 573
562 /* packet with unicast indication but broadcast recipient */ 574 /* packet with unicast indication but broadcast recipient */
563 if (is_broadcast_ether_addr(ethhdr->h_dest)) 575 if (is_broadcast_ether_addr(ethhdr->h_dest))
564 return -1; 576 return -EBADR;
565 577
566 /* packet with broadcast sender address */ 578 /* packet with broadcast sender address */
567 if (is_broadcast_ether_addr(ethhdr->h_source)) 579 if (is_broadcast_ether_addr(ethhdr->h_source))
568 return -1; 580 return -EBADR;
569 581
570 /* not for me */ 582 /* not for me */
571 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) 583 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
572 return -1; 584 return -EREMOTE;
573 585
574 return 0; 586 return 0;
575} 587}
@@ -852,15 +864,18 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
852 /* decrement ttl */ 864 /* decrement ttl */
853 unicast_packet->header.ttl--; 865 unicast_packet->header.ttl--;
854 866
855 /* Update stats counter */ 867 /* network code packet if possible */
856 batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); 868 if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) {
857 batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, 869 ret = NET_RX_SUCCESS;
858 skb->len + ETH_HLEN); 870 } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) {
859
860 /* route it */
861 if (batadv_send_skb_to_orig(skb, orig_node, recv_if))
862 ret = NET_RX_SUCCESS; 871 ret = NET_RX_SUCCESS;
863 872
873 /* Update stats counter */
874 batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
875 batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
876 skb->len + ETH_HLEN);
877 }
878
864out: 879out:
865 if (neigh_node) 880 if (neigh_node)
866 batadv_neigh_node_free_ref(neigh_node); 881 batadv_neigh_node_free_ref(neigh_node);
@@ -1035,7 +1050,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
1035 struct batadv_unicast_4addr_packet *unicast_4addr_packet; 1050 struct batadv_unicast_4addr_packet *unicast_4addr_packet;
1036 uint8_t *orig_addr; 1051 uint8_t *orig_addr;
1037 struct batadv_orig_node *orig_node = NULL; 1052 struct batadv_orig_node *orig_node = NULL;
1038 int hdr_size = sizeof(*unicast_packet); 1053 int check, hdr_size = sizeof(*unicast_packet);
1039 bool is4addr; 1054 bool is4addr;
1040 1055
1041 unicast_packet = (struct batadv_unicast_packet *)skb->data; 1056 unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -1046,7 +1061,16 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
1046 if (is4addr) 1061 if (is4addr)
1047 hdr_size = sizeof(*unicast_4addr_packet); 1062 hdr_size = sizeof(*unicast_4addr_packet);
1048 1063
1049 if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) 1064 /* function returns -EREMOTE for promiscuous packets */
1065 check = batadv_check_unicast_packet(bat_priv, skb, hdr_size);
1066
1067 /* Even though the packet is not for us, we might save it to use for
1068 * decoding a later received coded packet
1069 */
1070 if (check == -EREMOTE)
1071 batadv_nc_skb_store_sniffed_unicast(bat_priv, skb);
1072
1073 if (check < 0)
1050 return NET_RX_DROP; 1074 return NET_RX_DROP;
1051 1075
1052 if (!batadv_check_unicast_ttvn(bat_priv, skb)) 1076 if (!batadv_check_unicast_ttvn(bat_priv, skb))
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index a67cffde37ae..263cfd1ccee7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -27,6 +27,7 @@
27#include "vis.h" 27#include "vis.h"
28#include "gateway_common.h" 28#include "gateway_common.h"
29#include "originator.h" 29#include "originator.h"
30#include "network-coding.h"
30 31
31#include <linux/if_ether.h> 32#include <linux/if_ether.h>
32 33
@@ -39,6 +40,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,
39 struct batadv_hard_iface *hard_iface, 40 struct batadv_hard_iface *hard_iface,
40 const uint8_t *dst_addr) 41 const uint8_t *dst_addr)
41{ 42{
43 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
42 struct ethhdr *ethhdr; 44 struct ethhdr *ethhdr;
43 45
44 if (hard_iface->if_status != BATADV_IF_ACTIVE) 46 if (hard_iface->if_status != BATADV_IF_ACTIVE)
@@ -70,6 +72,9 @@ int batadv_send_skb_packet(struct sk_buff *skb,
70 72
71 skb->dev = hard_iface->net_dev; 73 skb->dev = hard_iface->net_dev;
72 74
75 /* Save a clone of the skb to use when decoding coded packets */
76 batadv_nc_skb_store_for_decoding(bat_priv, skb);
77
73 /* dev_queue_xmit() returns a negative result on error. However on 78 /* dev_queue_xmit() returns a negative result on error. However on
74 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP 79 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
75 * (which is > 0). This will not be treated as an error. 80 * (which is > 0). This will not be treated as an error.
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 2711e870f557..403b8c46085e 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -37,6 +37,7 @@
37#include <linux/if_ether.h> 37#include <linux/if_ether.h>
38#include "unicast.h" 38#include "unicast.h"
39#include "bridge_loop_avoidance.h" 39#include "bridge_loop_avoidance.h"
40#include "network-coding.h"
40 41
41 42
42static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); 43static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
@@ -401,55 +402,6 @@ static void batadv_set_lockdep_class(struct net_device *dev)
401} 402}
402 403
403/** 404/**
404 * batadv_softif_init - Late stage initialization of soft interface
405 * @dev: registered network device to modify
406 *
407 * Returns error code on failures
408 */
409static int batadv_softif_init(struct net_device *dev)
410{
411 batadv_set_lockdep_class(dev);
412
413 return 0;
414}
415
416static const struct net_device_ops batadv_netdev_ops = {
417 .ndo_init = batadv_softif_init,
418 .ndo_open = batadv_interface_open,
419 .ndo_stop = batadv_interface_release,
420 .ndo_get_stats = batadv_interface_stats,
421 .ndo_set_mac_address = batadv_interface_set_mac_addr,
422 .ndo_change_mtu = batadv_interface_change_mtu,
423 .ndo_start_xmit = batadv_interface_tx,
424 .ndo_validate_addr = eth_validate_addr
425};
426
427static void batadv_interface_setup(struct net_device *dev)
428{
429 struct batadv_priv *priv = netdev_priv(dev);
430
431 ether_setup(dev);
432
433 dev->netdev_ops = &batadv_netdev_ops;
434 dev->destructor = free_netdev;
435 dev->tx_queue_len = 0;
436
437 /* can't call min_mtu, because the needed variables
438 * have not been initialized yet
439 */
440 dev->mtu = ETH_DATA_LEN;
441 /* reserve more space in the skbuff for our header */
442 dev->hard_header_len = BATADV_HEADER_LEN;
443
444 /* generate random address */
445 eth_hw_addr_random(dev);
446
447 SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
448
449 memset(priv, 0, sizeof(*priv));
450}
451
452/**
453 * batadv_softif_destroy_finish - cleans up the remains of a softif 405 * batadv_softif_destroy_finish - cleans up the remains of a softif
454 * @work: work queue item 406 * @work: work queue item
455 * 407 *
@@ -465,7 +417,6 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
465 cleanup_work); 417 cleanup_work);
466 soft_iface = bat_priv->soft_iface; 418 soft_iface = bat_priv->soft_iface;
467 419
468 batadv_debugfs_del_meshif(soft_iface);
469 batadv_sysfs_del_meshif(soft_iface); 420 batadv_sysfs_del_meshif(soft_iface);
470 421
471 rtnl_lock(); 422 rtnl_lock();
@@ -473,21 +424,22 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
473 rtnl_unlock(); 424 rtnl_unlock();
474} 425}
475 426
476struct net_device *batadv_softif_create(const char *name) 427/**
428 * batadv_softif_init_late - late stage initialization of soft interface
429 * @dev: registered network device to modify
430 *
431 * Returns error code on failures
432 */
433static int batadv_softif_init_late(struct net_device *dev)
477{ 434{
478 struct net_device *soft_iface;
479 struct batadv_priv *bat_priv; 435 struct batadv_priv *bat_priv;
480 int ret; 436 int ret;
481 size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM; 437 size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM;
482 438
483 soft_iface = alloc_netdev(sizeof(*bat_priv), name, 439 batadv_set_lockdep_class(dev);
484 batadv_interface_setup);
485
486 if (!soft_iface)
487 goto out;
488 440
489 bat_priv = netdev_priv(soft_iface); 441 bat_priv = netdev_priv(dev);
490 bat_priv->soft_iface = soft_iface; 442 bat_priv->soft_iface = dev;
491 INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish); 443 INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
492 444
493 /* batadv_interface_stats() needs to be available as soon as 445 /* batadv_interface_stats() needs to be available as soon as
@@ -495,14 +447,7 @@ struct net_device *batadv_softif_create(const char *name)
495 */ 447 */
496 bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t)); 448 bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t));
497 if (!bat_priv->bat_counters) 449 if (!bat_priv->bat_counters)
498 goto free_soft_iface; 450 return -ENOMEM;
499
500 ret = register_netdevice(soft_iface);
501 if (ret < 0) {
502 pr_err("Unable to register the batman interface '%s': %i\n",
503 name, ret);
504 goto free_bat_counters;
505 }
506 451
507 atomic_set(&bat_priv->aggregated_ogms, 1); 452 atomic_set(&bat_priv->aggregated_ogms, 1);
508 atomic_set(&bat_priv->bonding, 0); 453 atomic_set(&bat_priv->bonding, 0);
@@ -540,49 +485,189 @@ struct net_device *batadv_softif_create(const char *name)
540 bat_priv->primary_if = NULL; 485 bat_priv->primary_if = NULL;
541 bat_priv->num_ifaces = 0; 486 bat_priv->num_ifaces = 0;
542 487
543 ret = batadv_algo_select(bat_priv, batadv_routing_algo); 488 batadv_nc_init_bat_priv(bat_priv);
544 if (ret < 0)
545 goto unreg_soft_iface;
546 489
547 ret = batadv_sysfs_add_meshif(soft_iface); 490 ret = batadv_algo_select(bat_priv, batadv_routing_algo);
548 if (ret < 0) 491 if (ret < 0)
549 goto unreg_soft_iface; 492 goto free_bat_counters;
550 493
551 ret = batadv_debugfs_add_meshif(soft_iface); 494 ret = batadv_debugfs_add_meshif(dev);
552 if (ret < 0) 495 if (ret < 0)
553 goto unreg_sysfs; 496 goto free_bat_counters;
554 497
555 ret = batadv_mesh_init(soft_iface); 498 ret = batadv_mesh_init(dev);
556 if (ret < 0) 499 if (ret < 0)
557 goto unreg_debugfs; 500 goto unreg_debugfs;
558 501
559 return soft_iface; 502 return 0;
560 503
561unreg_debugfs: 504unreg_debugfs:
562 batadv_debugfs_del_meshif(soft_iface); 505 batadv_debugfs_del_meshif(dev);
563unreg_sysfs:
564 batadv_sysfs_del_meshif(soft_iface);
565unreg_soft_iface:
566 free_percpu(bat_priv->bat_counters);
567 unregister_netdevice(soft_iface);
568 return NULL;
569
570free_bat_counters: 506free_bat_counters:
571 free_percpu(bat_priv->bat_counters); 507 free_percpu(bat_priv->bat_counters);
572free_soft_iface: 508
573 free_netdev(soft_iface); 509 return ret;
510}
511
512/**
513 * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
514 * @dev: batadv_soft_interface used as master interface
515 * @slave_dev: net_device which should become the slave interface
516 *
517 * Return 0 if successful or error otherwise.
518 */
519static int batadv_softif_slave_add(struct net_device *dev,
520 struct net_device *slave_dev)
521{
522 struct batadv_hard_iface *hard_iface;
523 int ret = -EINVAL;
524
525 hard_iface = batadv_hardif_get_by_netdev(slave_dev);
526 if (!hard_iface || hard_iface->soft_iface != NULL)
527 goto out;
528
529 ret = batadv_hardif_enable_interface(hard_iface, dev->name);
530
574out: 531out:
575 return NULL; 532 if (hard_iface)
533 batadv_hardif_free_ref(hard_iface);
534 return ret;
576} 535}
577 536
578void batadv_softif_destroy(struct net_device *soft_iface) 537/**
538 * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface
539 * @dev: batadv_soft_interface used as master interface
540 * @slave_dev: net_device which should be removed from the master interface
541 *
542 * Return 0 if successful or error otherwise.
543 */
544static int batadv_softif_slave_del(struct net_device *dev,
545 struct net_device *slave_dev)
546{
547 struct batadv_hard_iface *hard_iface;
548 int ret = -EINVAL;
549
550 hard_iface = batadv_hardif_get_by_netdev(slave_dev);
551
552 if (!hard_iface || hard_iface->soft_iface != dev)
553 goto out;
554
555 batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_KEEP);
556 ret = 0;
557
558out:
559 if (hard_iface)
560 batadv_hardif_free_ref(hard_iface);
561 return ret;
562}
563
564static const struct net_device_ops batadv_netdev_ops = {
565 .ndo_init = batadv_softif_init_late,
566 .ndo_open = batadv_interface_open,
567 .ndo_stop = batadv_interface_release,
568 .ndo_get_stats = batadv_interface_stats,
569 .ndo_set_mac_address = batadv_interface_set_mac_addr,
570 .ndo_change_mtu = batadv_interface_change_mtu,
571 .ndo_start_xmit = batadv_interface_tx,
572 .ndo_validate_addr = eth_validate_addr,
573 .ndo_add_slave = batadv_softif_slave_add,
574 .ndo_del_slave = batadv_softif_slave_del,
575};
576
577/**
578 * batadv_softif_free - Deconstructor of batadv_soft_interface
579 * @dev: Device to cleanup and remove
580 */
581static void batadv_softif_free(struct net_device *dev)
582{
583 batadv_debugfs_del_meshif(dev);
584 batadv_mesh_free(dev);
585 free_netdev(dev);
586}
587
588/**
589 * batadv_softif_init_early - early stage initialization of soft interface
590 * @dev: registered network device to modify
591 */
592static void batadv_softif_init_early(struct net_device *dev)
593{
594 struct batadv_priv *priv = netdev_priv(dev);
595
596 ether_setup(dev);
597
598 dev->netdev_ops = &batadv_netdev_ops;
599 dev->destructor = batadv_softif_free;
600 dev->tx_queue_len = 0;
601
602 /* can't call min_mtu, because the needed variables
603 * have not been initialized yet
604 */
605 dev->mtu = ETH_DATA_LEN;
606 /* reserve more space in the skbuff for our header */
607 dev->hard_header_len = BATADV_HEADER_LEN;
608
609 /* generate random address */
610 eth_hw_addr_random(dev);
611
612 SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
613
614 memset(priv, 0, sizeof(*priv));
615}
616
617struct net_device *batadv_softif_create(const char *name)
618{
619 struct net_device *soft_iface;
620 int ret;
621
622 soft_iface = alloc_netdev(sizeof(struct batadv_priv), name,
623 batadv_softif_init_early);
624 if (!soft_iface)
625 return NULL;
626
627 soft_iface->rtnl_link_ops = &batadv_link_ops;
628
629 ret = register_netdevice(soft_iface);
630 if (ret < 0) {
631 pr_err("Unable to register the batman interface '%s': %i\n",
632 name, ret);
633 free_netdev(soft_iface);
634 return NULL;
635 }
636
637 return soft_iface;
638}
639
640/**
641 * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs
642 * @soft_iface: the to-be-removed batman-adv interface
643 */
644void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
579{ 645{
580 struct batadv_priv *bat_priv = netdev_priv(soft_iface); 646 struct batadv_priv *bat_priv = netdev_priv(soft_iface);
581 647
582 batadv_mesh_free(soft_iface);
583 queue_work(batadv_event_workqueue, &bat_priv->cleanup_work); 648 queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
584} 649}
585 650
651/**
652 * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink
653 * @soft_iface: the to-be-removed batman-adv interface
654 * @head: list pointer
655 */
656static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
657 struct list_head *head)
658{
659 struct batadv_hard_iface *hard_iface;
660
661 list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
662 if (hard_iface->soft_iface == soft_iface)
663 batadv_hardif_disable_interface(hard_iface,
664 BATADV_IF_CLEANUP_KEEP);
665 }
666
667 batadv_sysfs_del_meshif(soft_iface);
668 unregister_netdevice_queue(soft_iface, head);
669}
670
586int batadv_softif_is_valid(const struct net_device *net_dev) 671int batadv_softif_is_valid(const struct net_device *net_dev)
587{ 672{
588 if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) 673 if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx)
@@ -591,6 +676,13 @@ int batadv_softif_is_valid(const struct net_device *net_dev)
591 return 0; 676 return 0;
592} 677}
593 678
679struct rtnl_link_ops batadv_link_ops __read_mostly = {
680 .kind = "batadv",
681 .priv_size = sizeof(struct batadv_priv),
682 .setup = batadv_softif_init_early,
683 .dellink = batadv_softif_destroy_netlink,
684};
685
594/* ethtool */ 686/* ethtool */
595static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 687static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
596{ 688{
@@ -662,6 +754,17 @@ static const struct {
662 { "dat_put_rx" }, 754 { "dat_put_rx" },
663 { "dat_cached_reply_tx" }, 755 { "dat_cached_reply_tx" },
664#endif 756#endif
757#ifdef CONFIG_BATMAN_ADV_NC
758 { "nc_code" },
759 { "nc_code_bytes" },
760 { "nc_recode" },
761 { "nc_recode_bytes" },
762 { "nc_buffer" },
763 { "nc_decode" },
764 { "nc_decode_bytes" },
765 { "nc_decode_failed" },
766 { "nc_sniffed" },
767#endif
665}; 768};
666 769
667static void batadv_get_strings(struct net_device *dev, uint32_t stringset, 770static void batadv_get_strings(struct net_device *dev, uint32_t stringset,
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 43182e5e603a..2f2472c2ea0d 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -25,7 +25,8 @@ void batadv_interface_rx(struct net_device *soft_iface,
25 struct sk_buff *skb, struct batadv_hard_iface *recv_if, 25 struct sk_buff *skb, struct batadv_hard_iface *recv_if,
26 int hdr_size, struct batadv_orig_node *orig_node); 26 int hdr_size, struct batadv_orig_node *orig_node);
27struct net_device *batadv_softif_create(const char *name); 27struct net_device *batadv_softif_create(const char *name);
28void batadv_softif_destroy(struct net_device *soft_iface); 28void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
29int batadv_softif_is_valid(const struct net_device *net_dev); 29int batadv_softif_is_valid(const struct net_device *net_dev);
30extern struct rtnl_link_ops batadv_link_ops;
30 31
31#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ 32#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index afbba319d73a..15a22efa9a67 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -442,6 +442,9 @@ static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
442#ifdef CONFIG_BATMAN_ADV_DEBUG 442#ifdef CONFIG_BATMAN_ADV_DEBUG
443BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL); 443BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL);
444#endif 444#endif
445#ifdef CONFIG_BATMAN_ADV_NC
446BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, NULL);
447#endif
445 448
446static struct batadv_attribute *batadv_mesh_attrs[] = { 449static struct batadv_attribute *batadv_mesh_attrs[] = {
447 &batadv_attr_aggregated_ogms, 450 &batadv_attr_aggregated_ogms,
@@ -464,6 +467,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
464#ifdef CONFIG_BATMAN_ADV_DEBUG 467#ifdef CONFIG_BATMAN_ADV_DEBUG
465 &batadv_attr_log_level, 468 &batadv_attr_log_level,
466#endif 469#endif
470#ifdef CONFIG_BATMAN_ADV_NC
471 &batadv_attr_network_coding,
472#endif
467 NULL, 473 NULL,
468}; 474};
469 475
@@ -582,13 +588,15 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
582 } 588 }
583 589
584 if (status_tmp == BATADV_IF_NOT_IN_USE) { 590 if (status_tmp == BATADV_IF_NOT_IN_USE) {
585 batadv_hardif_disable_interface(hard_iface); 591 batadv_hardif_disable_interface(hard_iface,
592 BATADV_IF_CLEANUP_AUTO);
586 goto unlock; 593 goto unlock;
587 } 594 }
588 595
589 /* if the interface already is in use */ 596 /* if the interface already is in use */
590 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) 597 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
591 batadv_hardif_disable_interface(hard_iface); 598 batadv_hardif_disable_interface(hard_iface,
599 BATADV_IF_CLEANUP_AUTO);
592 600
593 ret = batadv_hardif_enable_interface(hard_iface, buff); 601 ret = batadv_hardif_enable_interface(hard_iface, buff);
594 602
@@ -688,15 +696,10 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
688 enum batadv_uev_action action, const char *data) 696 enum batadv_uev_action action, const char *data)
689{ 697{
690 int ret = -ENOMEM; 698 int ret = -ENOMEM;
691 struct batadv_hard_iface *primary_if;
692 struct kobject *bat_kobj; 699 struct kobject *bat_kobj;
693 char *uevent_env[4] = { NULL, NULL, NULL, NULL }; 700 char *uevent_env[4] = { NULL, NULL, NULL, NULL };
694 701
695 primary_if = batadv_primary_if_get_selected(bat_priv); 702 bat_kobj = &bat_priv->soft_iface->dev.kobj;
696 if (!primary_if)
697 goto out;
698
699 bat_kobj = &primary_if->soft_iface->dev.kobj;
700 703
701 uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) + 704 uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) +
702 strlen(batadv_uev_type_str[type]) + 1, 705 strlen(batadv_uev_type_str[type]) + 1,
@@ -732,9 +735,6 @@ out:
732 kfree(uevent_env[1]); 735 kfree(uevent_env[1]);
733 kfree(uevent_env[2]); 736 kfree(uevent_env[2]);
734 737
735 if (primary_if)
736 batadv_hardif_free_ref(primary_if);
737
738 if (ret) 738 if (ret)
739 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 739 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
740 "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n", 740 "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n",
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7abee19567e9..5e89deeb9542 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -385,25 +385,19 @@ static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv,
385 int *packet_buff_len, 385 int *packet_buff_len,
386 int min_packet_len) 386 int min_packet_len)
387{ 387{
388 struct batadv_hard_iface *primary_if;
389 int req_len; 388 int req_len;
390 389
391 primary_if = batadv_primary_if_get_selected(bat_priv);
392
393 req_len = min_packet_len; 390 req_len = min_packet_len;
394 req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes)); 391 req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes));
395 392
396 /* if we have too many changes for one packet don't send any 393 /* if we have too many changes for one packet don't send any
397 * and wait for the tt table request which will be fragmented 394 * and wait for the tt table request which will be fragmented
398 */ 395 */
399 if ((!primary_if) || (req_len > primary_if->soft_iface->mtu)) 396 if (req_len > bat_priv->soft_iface->mtu)
400 req_len = min_packet_len; 397 req_len = min_packet_len;
401 398
402 batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len, 399 batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len,
403 min_packet_len, req_len); 400 min_packet_len, req_len);
404
405 if (primary_if)
406 batadv_hardif_free_ref(primary_if);
407} 401}
408 402
409static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, 403static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv,
@@ -908,7 +902,7 @@ out_remove:
908 /* remove address from local hash if present */ 902 /* remove address from local hash if present */
909 local_flags = batadv_tt_local_remove(bat_priv, tt_addr, 903 local_flags = batadv_tt_local_remove(bat_priv, tt_addr,
910 "global tt received", 904 "global tt received",
911 !!(flags & BATADV_TT_CLIENT_ROAM)); 905 flags & BATADV_TT_CLIENT_ROAM);
912 tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI; 906 tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI;
913 907
914 if (!(flags & BATADV_TT_CLIENT_ROAM)) 908 if (!(flags & BATADV_TT_CLIENT_ROAM))
@@ -1580,7 +1574,7 @@ static int batadv_tt_global_valid(const void *entry_ptr,
1580static struct sk_buff * 1574static struct sk_buff *
1581batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, 1575batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
1582 struct batadv_hashtable *hash, 1576 struct batadv_hashtable *hash,
1583 struct batadv_hard_iface *primary_if, 1577 struct batadv_priv *bat_priv,
1584 int (*valid_cb)(const void *, const void *), 1578 int (*valid_cb)(const void *, const void *),
1585 void *cb_data) 1579 void *cb_data)
1586{ 1580{
@@ -1594,8 +1588,8 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
1594 uint32_t i; 1588 uint32_t i;
1595 size_t len; 1589 size_t len;
1596 1590
1597 if (tt_query_size + tt_len > primary_if->soft_iface->mtu) { 1591 if (tt_query_size + tt_len > bat_priv->soft_iface->mtu) {
1598 tt_len = primary_if->soft_iface->mtu - tt_query_size; 1592 tt_len = bat_priv->soft_iface->mtu - tt_query_size;
1599 tt_len -= tt_len % sizeof(struct batadv_tt_change); 1593 tt_len -= tt_len % sizeof(struct batadv_tt_change);
1600 } 1594 }
1601 tt_tot = tt_len / sizeof(struct batadv_tt_change); 1595 tt_tot = tt_len / sizeof(struct batadv_tt_change);
@@ -1715,7 +1709,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1715{ 1709{
1716 struct batadv_orig_node *req_dst_orig_node; 1710 struct batadv_orig_node *req_dst_orig_node;
1717 struct batadv_orig_node *res_dst_orig_node = NULL; 1711 struct batadv_orig_node *res_dst_orig_node = NULL;
1718 struct batadv_hard_iface *primary_if = NULL;
1719 uint8_t orig_ttvn, req_ttvn, ttvn; 1712 uint8_t orig_ttvn, req_ttvn, ttvn;
1720 int ret = false; 1713 int ret = false;
1721 unsigned char *tt_buff; 1714 unsigned char *tt_buff;
@@ -1740,10 +1733,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1740 if (!res_dst_orig_node) 1733 if (!res_dst_orig_node)
1741 goto out; 1734 goto out;
1742 1735
1743 primary_if = batadv_primary_if_get_selected(bat_priv);
1744 if (!primary_if)
1745 goto out;
1746
1747 orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); 1736 orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
1748 req_ttvn = tt_request->ttvn; 1737 req_ttvn = tt_request->ttvn;
1749 1738
@@ -1791,7 +1780,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1791 1780
1792 skb = batadv_tt_response_fill_table(tt_len, ttvn, 1781 skb = batadv_tt_response_fill_table(tt_len, ttvn,
1793 bat_priv->tt.global_hash, 1782 bat_priv->tt.global_hash,
1794 primary_if, 1783 bat_priv,
1795 batadv_tt_global_valid, 1784 batadv_tt_global_valid,
1796 req_dst_orig_node); 1785 req_dst_orig_node);
1797 if (!skb) 1786 if (!skb)
@@ -1828,8 +1817,6 @@ out:
1828 batadv_orig_node_free_ref(res_dst_orig_node); 1817 batadv_orig_node_free_ref(res_dst_orig_node);
1829 if (req_dst_orig_node) 1818 if (req_dst_orig_node)
1830 batadv_orig_node_free_ref(req_dst_orig_node); 1819 batadv_orig_node_free_ref(req_dst_orig_node);
1831 if (primary_if)
1832 batadv_hardif_free_ref(primary_if);
1833 if (!ret) 1820 if (!ret)
1834 kfree_skb(skb); 1821 kfree_skb(skb);
1835 return ret; 1822 return ret;
@@ -1907,7 +1894,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
1907 1894
1908 skb = batadv_tt_response_fill_table(tt_len, ttvn, 1895 skb = batadv_tt_response_fill_table(tt_len, ttvn,
1909 bat_priv->tt.local_hash, 1896 bat_priv->tt.local_hash,
1910 primary_if, 1897 bat_priv,
1911 batadv_tt_local_valid_entry, 1898 batadv_tt_local_valid_entry,
1912 NULL); 1899 NULL);
1913 if (!skb) 1900 if (!skb)
@@ -2528,7 +2515,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
2528 if (!tt_global_entry) 2515 if (!tt_global_entry)
2529 goto out; 2516 goto out;
2530 2517
2531 ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM); 2518 ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM;
2532 batadv_tt_global_entry_free_ref(tt_global_entry); 2519 batadv_tt_global_entry_free_ref(tt_global_entry);
2533out: 2520out:
2534 return ret; 2521 return ret;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 4cd87a0b5b80..aba8364c3689 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -128,6 +128,10 @@ struct batadv_hard_iface {
128 * @bond_list: list of bonding candidates 128 * @bond_list: list of bonding candidates
129 * @refcount: number of contexts the object is used 129 * @refcount: number of contexts the object is used
130 * @rcu: struct used for freeing in an RCU-safe manner 130 * @rcu: struct used for freeing in an RCU-safe manner
131 * @in_coding_list: list of nodes this orig can hear
132 * @out_coding_list: list of nodes that can hear this orig
133 * @in_coding_list_lock: protects in_coding_list
134 * @out_coding_list_lock: protects out_coding_list
131 */ 135 */
132struct batadv_orig_node { 136struct batadv_orig_node {
133 uint8_t orig[ETH_ALEN]; 137 uint8_t orig[ETH_ALEN];
@@ -171,6 +175,12 @@ struct batadv_orig_node {
171 struct list_head bond_list; 175 struct list_head bond_list;
172 atomic_t refcount; 176 atomic_t refcount;
173 struct rcu_head rcu; 177 struct rcu_head rcu;
178#ifdef CONFIG_BATMAN_ADV_NC
179 struct list_head in_coding_list;
180 struct list_head out_coding_list;
181 spinlock_t in_coding_list_lock; /* Protects in_coding_list */
182 spinlock_t out_coding_list_lock; /* Protects out_coding_list */
183#endif
174}; 184};
175 185
176/** 186/**
@@ -265,6 +275,17 @@ struct batadv_bcast_duplist_entry {
265 * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter 275 * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter
266 * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet 276 * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet
267 * counter 277 * counter
278 * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter
279 * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes counter
280 * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet counter
281 * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes counter
282 * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc decoding
283 * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter
284 * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes counter
285 * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic packet
286 * counter
287 * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in promisc
288 * mode.
268 * @BATADV_CNT_NUM: number of traffic counters 289 * @BATADV_CNT_NUM: number of traffic counters
269 */ 290 */
270enum batadv_counters { 291enum batadv_counters {
@@ -292,6 +313,17 @@ enum batadv_counters {
292 BATADV_CNT_DAT_PUT_RX, 313 BATADV_CNT_DAT_PUT_RX,
293 BATADV_CNT_DAT_CACHED_REPLY_TX, 314 BATADV_CNT_DAT_CACHED_REPLY_TX,
294#endif 315#endif
316#ifdef CONFIG_BATMAN_ADV_NC
317 BATADV_CNT_NC_CODE,
318 BATADV_CNT_NC_CODE_BYTES,
319 BATADV_CNT_NC_RECODE,
320 BATADV_CNT_NC_RECODE_BYTES,
321 BATADV_CNT_NC_BUFFER,
322 BATADV_CNT_NC_DECODE,
323 BATADV_CNT_NC_DECODE_BYTES,
324 BATADV_CNT_NC_DECODE_FAILED,
325 BATADV_CNT_NC_SNIFFED,
326#endif
295 BATADV_CNT_NUM, 327 BATADV_CNT_NUM,
296}; 328};
297 329
@@ -428,6 +460,35 @@ struct batadv_priv_dat {
428#endif 460#endif
429 461
430/** 462/**
463 * struct batadv_priv_nc - per mesh interface network coding private data
464 * @work: work queue callback item for cleanup
465 * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
466 * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq
467 * @max_fwd_delay: maximum packet forward delay to allow coding of packets
468 * @max_buffer_time: buffer time for sniffed packets used to decoding
469 * @timestamp_fwd_flush: timestamp of last forward packet queue flush
470 * @timestamp_sniffed_purge: timestamp of last sniffed packet queue purge
471 * @coding_hash: Hash table used to buffer skbs while waiting for another
472 * incoming skb to code it with. Skbs are added to the buffer just before being
473 * forwarded in routing.c
474 * @decoding_hash: Hash table used to buffer skbs that might be needed to decode
475 * a received coded skb. The buffer is used for 1) skbs arriving on the
476 * soft-interface; 2) skbs overheard on the hard-interface; and 3) skbs
477 * forwarded by batman-adv.
478 */
479struct batadv_priv_nc {
480 struct delayed_work work;
481 struct dentry *debug_dir;
482 u8 min_tq;
483 u32 max_fwd_delay;
484 u32 max_buffer_time;
485 unsigned long timestamp_fwd_flush;
486 unsigned long timestamp_sniffed_purge;
487 struct batadv_hashtable *coding_hash;
488 struct batadv_hashtable *decoding_hash;
489};
490
491/**
431 * struct batadv_priv - per mesh interface data 492 * struct batadv_priv - per mesh interface data
432 * @mesh_state: current status of the mesh (inactive/active/deactivating) 493 * @mesh_state: current status of the mesh (inactive/active/deactivating)
433 * @soft_iface: net device which holds this struct as private data 494 * @soft_iface: net device which holds this struct as private data
@@ -470,6 +531,8 @@ struct batadv_priv_dat {
470 * @tt: translation table data 531 * @tt: translation table data
471 * @vis: vis data 532 * @vis: vis data
472 * @dat: distributed arp table data 533 * @dat: distributed arp table data
534 * @network_coding: bool indicating whether network coding is enabled
535 * @batadv_priv_nc: network coding data
473 */ 536 */
474struct batadv_priv { 537struct batadv_priv {
475 atomic_t mesh_state; 538 atomic_t mesh_state;
@@ -522,6 +585,10 @@ struct batadv_priv {
522#ifdef CONFIG_BATMAN_ADV_DAT 585#ifdef CONFIG_BATMAN_ADV_DAT
523 struct batadv_priv_dat dat; 586 struct batadv_priv_dat dat;
524#endif 587#endif
588#ifdef CONFIG_BATMAN_ADV_NC
589 atomic_t network_coding;
590 struct batadv_priv_nc nc;
591#endif /* CONFIG_BATMAN_ADV_NC */
525}; 592};
526 593
527/** 594/**
@@ -702,6 +769,75 @@ struct batadv_tt_roam_node {
702}; 769};
703 770
704/** 771/**
772 * struct batadv_nc_node - network coding node
773 * @list: next and prev pointer for the list handling
774 * @addr: the node's mac address
775 * @refcount: number of contexts the object is used by
776 * @rcu: struct used for freeing in an RCU-safe manner
777 * @orig_node: pointer to corresponding orig node struct
778 * @last_seen: timestamp of last ogm received from this node
779 */
780struct batadv_nc_node {
781 struct list_head list;
782 uint8_t addr[ETH_ALEN];
783 atomic_t refcount;
784 struct rcu_head rcu;
785 struct batadv_orig_node *orig_node;
786 unsigned long last_seen;
787};
788
789/**
790 * struct batadv_nc_path - network coding path
791 * @hash_entry: next and prev pointer for the list handling
792 * @rcu: struct used for freeing in an RCU-safe manner
793 * @refcount: number of contexts the object is used by
794 * @packet_list: list of buffered packets for this path
795 * @packet_list_lock: access lock for packet list
796 * @next_hop: next hop (destination) of path
797 * @prev_hop: previous hop (source) of path
798 * @last_valid: timestamp for last validation of path
799 */
800struct batadv_nc_path {
801 struct hlist_node hash_entry;
802 struct rcu_head rcu;
803 atomic_t refcount;
804 struct list_head packet_list;
805 spinlock_t packet_list_lock; /* Protects packet_list */
806 uint8_t next_hop[ETH_ALEN];
807 uint8_t prev_hop[ETH_ALEN];
808 unsigned long last_valid;
809};
810
811/**
812 * struct batadv_nc_packet - network coding packet used when coding and
813 * decoding packets
814 * @list: next and prev pointer for the list handling
815 * @packet_id: crc32 checksum of skb data
816 * @timestamp: field containing the info when the packet was added to path
817 * @neigh_node: pointer to original next hop neighbor of skb
818 * @skb: skb which can be encoded or used for decoding
819 * @nc_path: pointer to path this nc packet is attached to
820 */
821struct batadv_nc_packet {
822 struct list_head list;
823 __be32 packet_id;
824 unsigned long timestamp;
825 struct batadv_neigh_node *neigh_node;
826 struct sk_buff *skb;
827 struct batadv_nc_path *nc_path;
828};
829
830/**
831 * batadv_skb_cb - control buffer structure used to store private data relevant
832 * to batman-adv in the skb->cb buffer in skbs.
833 * @decoded: Marks a skb as decoded, which is checked when searching for coding
834 * opportunities in network-coding.c
835 */
836struct batadv_skb_cb {
837 bool decoded;
838};
839
840/**
705 * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded 841 * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded
706 * @list: list node for batadv_socket_client::queue_list 842 * @list: list node for batadv_socket_client::queue_list
707 * @send_time: execution time for delayed_work (packet sending) 843 * @send_time: execution time for delayed_work (packet sending)
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 50e079f00be6..0bb3b5982f94 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -122,7 +122,7 @@ batadv_frag_search_packet(struct list_head *head,
122{ 122{
123 struct batadv_frag_packet_list_entry *tfp; 123 struct batadv_frag_packet_list_entry *tfp;
124 struct batadv_unicast_frag_packet *tmp_up = NULL; 124 struct batadv_unicast_frag_packet *tmp_up = NULL;
125 int is_head_tmp, is_head; 125 bool is_head_tmp, is_head;
126 uint16_t search_seqno; 126 uint16_t search_seqno;
127 127
128 if (up->flags & BATADV_UNI_FRAG_HEAD) 128 if (up->flags & BATADV_UNI_FRAG_HEAD)
@@ -130,7 +130,7 @@ batadv_frag_search_packet(struct list_head *head,
130 else 130 else
131 search_seqno = ntohs(up->seqno)-1; 131 search_seqno = ntohs(up->seqno)-1;
132 132
133 is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD); 133 is_head = up->flags & BATADV_UNI_FRAG_HEAD;
134 134
135 list_for_each_entry(tfp, head, list) { 135 list_for_each_entry(tfp, head, list) {
136 if (!tfp->skb) 136 if (!tfp->skb)
@@ -142,7 +142,7 @@ batadv_frag_search_packet(struct list_head *head,
142 tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data; 142 tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data;
143 143
144 if (tfp->seqno == search_seqno) { 144 if (tfp->seqno == search_seqno) {
145 is_head_tmp = !!(tmp_up->flags & BATADV_UNI_FRAG_HEAD); 145 is_head_tmp = tmp_up->flags & BATADV_UNI_FRAG_HEAD;
146 if (is_head_tmp != is_head) 146 if (is_head_tmp != is_head)
147 return tfp; 147 return tfp;
148 else 148 else
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 6a1e646be96d..1625e5793a89 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -149,7 +149,7 @@ static void batadv_vis_data_read_prim_sec(struct seq_file *seq,
149 149
150 hlist_for_each_entry(entry, if_list, list) { 150 hlist_for_each_entry(entry, if_list, list) {
151 if (entry->primary) 151 if (entry->primary)
152 seq_printf(seq, "PRIMARY, "); 152 seq_puts(seq, "PRIMARY, ");
153 else 153 else
154 seq_printf(seq, "SEC %pM, ", entry->addr); 154 seq_printf(seq, "SEC %pM, ", entry->addr);
155 } 155 }
@@ -207,7 +207,7 @@ static void batadv_vis_data_read_entries(struct seq_file *seq,
207 if (batadv_compare_eth(entry->addr, packet->vis_orig)) 207 if (batadv_compare_eth(entry->addr, packet->vis_orig))
208 batadv_vis_data_read_prim_sec(seq, list); 208 batadv_vis_data_read_prim_sec(seq, list);
209 209
210 seq_printf(seq, "\n"); 210 seq_puts(seq, "\n");
211 } 211 }
212} 212}
213 213
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index eb0f4b16ff09..17f33a62f6db 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -397,13 +397,12 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
397 if (ctrl) { 397 if (ctrl) {
398 u8 *assoc; 398 u8 *assoc;
399 399
400 assoc = kzalloc(assoc_len, GFP_KERNEL); 400 assoc = kmemdup(rsp->amp_assoc, assoc_len, GFP_KERNEL);
401 if (!assoc) { 401 if (!assoc) {
402 amp_ctrl_put(ctrl); 402 amp_ctrl_put(ctrl);
403 return -ENOMEM; 403 return -ENOMEM;
404 } 404 }
405 405
406 memcpy(assoc, rsp->amp_assoc, assoc_len);
407 ctrl->assoc = assoc; 406 ctrl->assoc = assoc;
408 ctrl->assoc_len = assoc_len; 407 ctrl->assoc_len = assoc_len;
409 ctrl->assoc_rem_len = assoc_len; 408 ctrl->assoc_rem_len = assoc_len;
@@ -472,13 +471,12 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
472 size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req); 471 size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req);
473 u8 *assoc; 472 u8 *assoc;
474 473
475 assoc = kzalloc(assoc_len, GFP_KERNEL); 474 assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL);
476 if (!assoc) { 475 if (!assoc) {
477 amp_ctrl_put(ctrl); 476 amp_ctrl_put(ctrl);
478 return -ENOMEM; 477 return -ENOMEM;
479 } 478 }
480 479
481 memcpy(assoc, req->amp_assoc, assoc_len);
482 ctrl->assoc = assoc; 480 ctrl->assoc = assoc;
483 ctrl->assoc_len = assoc_len; 481 ctrl->assoc_len = assoc_len;
484 ctrl->assoc_rem_len = assoc_len; 482 ctrl->assoc_rem_len = assoc_len;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 0d1b08cc76e1..e5338f787d68 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -92,23 +92,14 @@ int bt_sock_register(int proto, const struct net_proto_family *ops)
92} 92}
93EXPORT_SYMBOL(bt_sock_register); 93EXPORT_SYMBOL(bt_sock_register);
94 94
95int bt_sock_unregister(int proto) 95void bt_sock_unregister(int proto)
96{ 96{
97 int err = 0;
98
99 if (proto < 0 || proto >= BT_MAX_PROTO) 97 if (proto < 0 || proto >= BT_MAX_PROTO)
100 return -EINVAL; 98 return;
101 99
102 write_lock(&bt_proto_lock); 100 write_lock(&bt_proto_lock);
103 101 bt_proto[proto] = NULL;
104 if (!bt_proto[proto])
105 err = -ENOENT;
106 else
107 bt_proto[proto] = NULL;
108
109 write_unlock(&bt_proto_lock); 102 write_unlock(&bt_proto_lock);
110
111 return err;
112} 103}
113EXPORT_SYMBOL(bt_sock_unregister); 104EXPORT_SYMBOL(bt_sock_unregister);
114 105
@@ -422,7 +413,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
422 return bt_accept_poll(sk); 413 return bt_accept_poll(sk);
423 414
424 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 415 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
425 mask |= POLLERR; 416 mask |= POLLERR |
417 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
426 418
427 if (sk->sk_shutdown & RCV_SHUTDOWN) 419 if (sk->sk_shutdown & RCV_SHUTDOWN)
428 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 420 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index e58c8b32589c..4b488ec26105 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -136,7 +136,7 @@ static u16 bnep_net_eth_proto(struct sk_buff *skb)
136 struct ethhdr *eh = (void *) skb->data; 136 struct ethhdr *eh = (void *) skb->data;
137 u16 proto = ntohs(eh->h_proto); 137 u16 proto = ntohs(eh->h_proto);
138 138
139 if (proto >= 1536) 139 if (proto >= ETH_P_802_3_MIN)
140 return proto; 140 return proto;
141 141
142 if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF)) 142 if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF))
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index e7154a58465f..5b1c04e28821 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -253,8 +253,6 @@ error:
253void __exit bnep_sock_cleanup(void) 253void __exit bnep_sock_cleanup(void)
254{ 254{
255 bt_procfs_cleanup(&init_net, "bnep"); 255 bt_procfs_cleanup(&init_net, "bnep");
256 if (bt_sock_unregister(BTPROTO_BNEP) < 0) 256 bt_sock_unregister(BTPROTO_BNEP);
257 BT_ERR("Can't unregister BNEP socket");
258
259 proto_unregister(&bnep_proto); 257 proto_unregister(&bnep_proto);
260} 258}
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 1c57482112b6..58d9edebab4b 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -264,8 +264,6 @@ error:
264void cmtp_cleanup_sockets(void) 264void cmtp_cleanup_sockets(void)
265{ 265{
266 bt_procfs_cleanup(&init_net, "cmtp"); 266 bt_procfs_cleanup(&init_net, "cmtp");
267 if (bt_sock_unregister(BTPROTO_CMTP) < 0) 267 bt_sock_unregister(BTPROTO_CMTP);
268 BT_ERR("Can't unregister CMTP socket");
269
270 proto_unregister(&cmtp_proto); 268 proto_unregister(&cmtp_proto);
271} 269}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 4925a02ae7e4..b9f90169940b 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -117,7 +117,7 @@ static void hci_acl_create_connection_cancel(struct hci_conn *conn)
117 hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp); 117 hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp);
118} 118}
119 119
120void hci_acl_disconn(struct hci_conn *conn, __u8 reason) 120void hci_disconnect(struct hci_conn *conn, __u8 reason)
121{ 121{
122 struct hci_cp_disconnect cp; 122 struct hci_cp_disconnect cp;
123 123
@@ -253,7 +253,7 @@ static void hci_conn_disconnect(struct hci_conn *conn)
253 hci_amp_disconn(conn, reason); 253 hci_amp_disconn(conn, reason);
254 break; 254 break;
255 default: 255 default:
256 hci_acl_disconn(conn, reason); 256 hci_disconnect(conn, reason);
257 break; 257 break;
258 } 258 }
259} 259}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 60793e7b768b..cfcad5423f1c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -57,36 +57,9 @@ static void hci_notify(struct hci_dev *hdev, int event)
57 57
58/* ---- HCI requests ---- */ 58/* ---- HCI requests ---- */
59 59
60void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result) 60static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)
61{ 61{
62 BT_DBG("%s command 0x%4.4x result 0x%2.2x", hdev->name, cmd, result); 62 BT_DBG("%s result 0x%2.2x", hdev->name, result);
63
64 /* If this is the init phase check if the completed command matches
65 * the last init command, and if not just return.
66 */
67 if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) {
68 struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
69 u16 opcode = __le16_to_cpu(sent->opcode);
70 struct sk_buff *skb;
71
72 /* Some CSR based controllers generate a spontaneous
73 * reset complete event during init and any pending
74 * command will never be completed. In such a case we
75 * need to resend whatever was the last sent
76 * command.
77 */
78
79 if (cmd != HCI_OP_RESET || opcode == HCI_OP_RESET)
80 return;
81
82 skb = skb_clone(hdev->sent_cmd, GFP_ATOMIC);
83 if (skb) {
84 skb_queue_head(&hdev->cmd_q, skb);
85 queue_work(hdev->workqueue, &hdev->cmd_work);
86 }
87
88 return;
89 }
90 63
91 if (hdev->req_status == HCI_REQ_PEND) { 64 if (hdev->req_status == HCI_REQ_PEND) {
92 hdev->req_result = result; 65 hdev->req_result = result;
@@ -107,21 +80,41 @@ static void hci_req_cancel(struct hci_dev *hdev, int err)
107} 80}
108 81
109/* Execute request and wait for completion. */ 82/* Execute request and wait for completion. */
110static int __hci_request(struct hci_dev *hdev, 83static int __hci_req_sync(struct hci_dev *hdev,
111 void (*req)(struct hci_dev *hdev, unsigned long opt), 84 void (*func)(struct hci_request *req,
112 unsigned long opt, __u32 timeout) 85 unsigned long opt),
86 unsigned long opt, __u32 timeout)
113{ 87{
88 struct hci_request req;
114 DECLARE_WAITQUEUE(wait, current); 89 DECLARE_WAITQUEUE(wait, current);
115 int err = 0; 90 int err = 0;
116 91
117 BT_DBG("%s start", hdev->name); 92 BT_DBG("%s start", hdev->name);
118 93
94 hci_req_init(&req, hdev);
95
119 hdev->req_status = HCI_REQ_PEND; 96 hdev->req_status = HCI_REQ_PEND;
120 97
98 func(&req, opt);
99
100 err = hci_req_run(&req, hci_req_sync_complete);
101 if (err < 0) {
102 hdev->req_status = 0;
103
104 /* ENODATA means the HCI request command queue is empty.
105 * This can happen when a request with conditionals doesn't
106 * trigger any commands to be sent. This is normal behavior
107 * and should not trigger an error return.
108 */
109 if (err == -ENODATA)
110 return 0;
111
112 return err;
113 }
114
121 add_wait_queue(&hdev->req_wait_q, &wait); 115 add_wait_queue(&hdev->req_wait_q, &wait);
122 set_current_state(TASK_INTERRUPTIBLE); 116 set_current_state(TASK_INTERRUPTIBLE);
123 117
124 req(hdev, opt);
125 schedule_timeout(timeout); 118 schedule_timeout(timeout);
126 119
127 remove_wait_queue(&hdev->req_wait_q, &wait); 120 remove_wait_queue(&hdev->req_wait_q, &wait);
@@ -150,9 +143,10 @@ static int __hci_request(struct hci_dev *hdev,
150 return err; 143 return err;
151} 144}
152 145
153static int hci_request(struct hci_dev *hdev, 146static int hci_req_sync(struct hci_dev *hdev,
154 void (*req)(struct hci_dev *hdev, unsigned long opt), 147 void (*req)(struct hci_request *req,
155 unsigned long opt, __u32 timeout) 148 unsigned long opt),
149 unsigned long opt, __u32 timeout)
156{ 150{
157 int ret; 151 int ret;
158 152
@@ -161,75 +155,86 @@ static int hci_request(struct hci_dev *hdev,
161 155
162 /* Serialize all requests */ 156 /* Serialize all requests */
163 hci_req_lock(hdev); 157 hci_req_lock(hdev);
164 ret = __hci_request(hdev, req, opt, timeout); 158 ret = __hci_req_sync(hdev, req, opt, timeout);
165 hci_req_unlock(hdev); 159 hci_req_unlock(hdev);
166 160
167 return ret; 161 return ret;
168} 162}
169 163
170static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) 164static void hci_reset_req(struct hci_request *req, unsigned long opt)
171{ 165{
172 BT_DBG("%s %ld", hdev->name, opt); 166 BT_DBG("%s %ld", req->hdev->name, opt);
173 167
174 /* Reset device */ 168 /* Reset device */
175 set_bit(HCI_RESET, &hdev->flags); 169 set_bit(HCI_RESET, &req->hdev->flags);
176 hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); 170 hci_req_add(req, HCI_OP_RESET, 0, NULL);
177} 171}
178 172
179static void bredr_init(struct hci_dev *hdev) 173static void bredr_init(struct hci_request *req)
180{ 174{
181 hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; 175 req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
182 176
183 /* Read Local Supported Features */ 177 /* Read Local Supported Features */
184 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); 178 hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
185 179
186 /* Read Local Version */ 180 /* Read Local Version */
187 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); 181 hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
182
183 /* Read BD Address */
184 hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL);
188} 185}
189 186
190static void amp_init(struct hci_dev *hdev) 187static void amp_init(struct hci_request *req)
191{ 188{
192 hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; 189 req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
193 190
194 /* Read Local Version */ 191 /* Read Local Version */
195 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); 192 hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
196 193
197 /* Read Local AMP Info */ 194 /* Read Local AMP Info */
198 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); 195 hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
199 196
200 /* Read Data Blk size */ 197 /* Read Data Blk size */
201 hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); 198 hci_req_add(req, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL);
202} 199}
203 200
204static void hci_init_req(struct hci_dev *hdev, unsigned long opt) 201static void hci_init1_req(struct hci_request *req, unsigned long opt)
205{ 202{
203 struct hci_dev *hdev = req->hdev;
204 struct hci_request init_req;
206 struct sk_buff *skb; 205 struct sk_buff *skb;
207 206
208 BT_DBG("%s %ld", hdev->name, opt); 207 BT_DBG("%s %ld", hdev->name, opt);
209 208
210 /* Driver initialization */ 209 /* Driver initialization */
211 210
211 hci_req_init(&init_req, hdev);
212
212 /* Special commands */ 213 /* Special commands */
213 while ((skb = skb_dequeue(&hdev->driver_init))) { 214 while ((skb = skb_dequeue(&hdev->driver_init))) {
214 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; 215 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
215 skb->dev = (void *) hdev; 216 skb->dev = (void *) hdev;
216 217
217 skb_queue_tail(&hdev->cmd_q, skb); 218 if (skb_queue_empty(&init_req.cmd_q))
218 queue_work(hdev->workqueue, &hdev->cmd_work); 219 bt_cb(skb)->req.start = true;
220
221 skb_queue_tail(&init_req.cmd_q, skb);
219 } 222 }
220 skb_queue_purge(&hdev->driver_init); 223 skb_queue_purge(&hdev->driver_init);
221 224
225 hci_req_run(&init_req, NULL);
226
222 /* Reset */ 227 /* Reset */
223 if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) 228 if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks))
224 hci_reset_req(hdev, 0); 229 hci_reset_req(req, 0);
225 230
226 switch (hdev->dev_type) { 231 switch (hdev->dev_type) {
227 case HCI_BREDR: 232 case HCI_BREDR:
228 bredr_init(hdev); 233 bredr_init(req);
229 break; 234 break;
230 235
231 case HCI_AMP: 236 case HCI_AMP:
232 amp_init(hdev); 237 amp_init(req);
233 break; 238 break;
234 239
235 default: 240 default:
@@ -238,44 +243,327 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
238 } 243 }
239} 244}
240 245
241static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) 246static void bredr_setup(struct hci_request *req)
247{
248 struct hci_cp_delete_stored_link_key cp;
249 __le16 param;
250 __u8 flt_type;
251
252 /* Read Buffer Size (ACL mtu, max pkt, etc.) */
253 hci_req_add(req, HCI_OP_READ_BUFFER_SIZE, 0, NULL);
254
255 /* Read Class of Device */
256 hci_req_add(req, HCI_OP_READ_CLASS_OF_DEV, 0, NULL);
257
258 /* Read Local Name */
259 hci_req_add(req, HCI_OP_READ_LOCAL_NAME, 0, NULL);
260
261 /* Read Voice Setting */
262 hci_req_add(req, HCI_OP_READ_VOICE_SETTING, 0, NULL);
263
264 /* Clear Event Filters */
265 flt_type = HCI_FLT_CLEAR_ALL;
266 hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
267
268 /* Connection accept timeout ~20 secs */
269 param = __constant_cpu_to_le16(0x7d00);
270 hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
271
272 bacpy(&cp.bdaddr, BDADDR_ANY);
273 cp.delete_all = 0x01;
274 hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
275
276 /* Read page scan parameters */
277 if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) {
278 hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
279 hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
280 }
281}
282
283static void le_setup(struct hci_request *req)
284{
285 /* Read LE Buffer Size */
286 hci_req_add(req, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
287
288 /* Read LE Local Supported Features */
289 hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL);
290
291 /* Read LE Advertising Channel TX Power */
292 hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
293
294 /* Read LE White List Size */
295 hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
296
297 /* Read LE Supported States */
298 hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
299}
300
301static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
302{
303 if (lmp_ext_inq_capable(hdev))
304 return 0x02;
305
306 if (lmp_inq_rssi_capable(hdev))
307 return 0x01;
308
309 if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
310 hdev->lmp_subver == 0x0757)
311 return 0x01;
312
313 if (hdev->manufacturer == 15) {
314 if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
315 return 0x01;
316 if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
317 return 0x01;
318 if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
319 return 0x01;
320 }
321
322 if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
323 hdev->lmp_subver == 0x1805)
324 return 0x01;
325
326 return 0x00;
327}
328
329static void hci_setup_inquiry_mode(struct hci_request *req)
330{
331 u8 mode;
332
333 mode = hci_get_inquiry_mode(req->hdev);
334
335 hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
336}
337
338static void hci_setup_event_mask(struct hci_request *req)
339{
340 struct hci_dev *hdev = req->hdev;
341
342 /* The second byte is 0xff instead of 0x9f (two reserved bits
343 * disabled) since a Broadcom 1.2 dongle doesn't respond to the
344 * command otherwise.
345 */
346 u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
347
348 /* CSR 1.1 dongles does not accept any bitfield so don't try to set
349 * any event mask for pre 1.2 devices.
350 */
351 if (hdev->hci_ver < BLUETOOTH_VER_1_2)
352 return;
353
354 if (lmp_bredr_capable(hdev)) {
355 events[4] |= 0x01; /* Flow Specification Complete */
356 events[4] |= 0x02; /* Inquiry Result with RSSI */
357 events[4] |= 0x04; /* Read Remote Extended Features Complete */
358 events[5] |= 0x08; /* Synchronous Connection Complete */
359 events[5] |= 0x10; /* Synchronous Connection Changed */
360 }
361
362 if (lmp_inq_rssi_capable(hdev))
363 events[4] |= 0x02; /* Inquiry Result with RSSI */
364
365 if (lmp_sniffsubr_capable(hdev))
366 events[5] |= 0x20; /* Sniff Subrating */
367
368 if (lmp_pause_enc_capable(hdev))
369 events[5] |= 0x80; /* Encryption Key Refresh Complete */
370
371 if (lmp_ext_inq_capable(hdev))
372 events[5] |= 0x40; /* Extended Inquiry Result */
373
374 if (lmp_no_flush_capable(hdev))
375 events[7] |= 0x01; /* Enhanced Flush Complete */
376
377 if (lmp_lsto_capable(hdev))
378 events[6] |= 0x80; /* Link Supervision Timeout Changed */
379
380 if (lmp_ssp_capable(hdev)) {
381 events[6] |= 0x01; /* IO Capability Request */
382 events[6] |= 0x02; /* IO Capability Response */
383 events[6] |= 0x04; /* User Confirmation Request */
384 events[6] |= 0x08; /* User Passkey Request */
385 events[6] |= 0x10; /* Remote OOB Data Request */
386 events[6] |= 0x20; /* Simple Pairing Complete */
387 events[7] |= 0x04; /* User Passkey Notification */
388 events[7] |= 0x08; /* Keypress Notification */
389 events[7] |= 0x10; /* Remote Host Supported
390 * Features Notification
391 */
392 }
393
394 if (lmp_le_capable(hdev))
395 events[7] |= 0x20; /* LE Meta-Event */
396
397 hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
398
399 if (lmp_le_capable(hdev)) {
400 memset(events, 0, sizeof(events));
401 events[0] = 0x1f;
402 hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK,
403 sizeof(events), events);
404 }
405}
406
407static void hci_init2_req(struct hci_request *req, unsigned long opt)
408{
409 struct hci_dev *hdev = req->hdev;
410
411 if (lmp_bredr_capable(hdev))
412 bredr_setup(req);
413
414 if (lmp_le_capable(hdev))
415 le_setup(req);
416
417 hci_setup_event_mask(req);
418
419 if (hdev->hci_ver > BLUETOOTH_VER_1_1)
420 hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
421
422 if (lmp_ssp_capable(hdev)) {
423 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
424 u8 mode = 0x01;
425 hci_req_add(req, HCI_OP_WRITE_SSP_MODE,
426 sizeof(mode), &mode);
427 } else {
428 struct hci_cp_write_eir cp;
429
430 memset(hdev->eir, 0, sizeof(hdev->eir));
431 memset(&cp, 0, sizeof(cp));
432
433 hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
434 }
435 }
436
437 if (lmp_inq_rssi_capable(hdev))
438 hci_setup_inquiry_mode(req);
439
440 if (lmp_inq_tx_pwr_capable(hdev))
441 hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
442
443 if (lmp_ext_feat_capable(hdev)) {
444 struct hci_cp_read_local_ext_features cp;
445
446 cp.page = 0x01;
447 hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES,
448 sizeof(cp), &cp);
449 }
450
451 if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
452 u8 enable = 1;
453 hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
454 &enable);
455 }
456}
457
458static void hci_setup_link_policy(struct hci_request *req)
459{
460 struct hci_dev *hdev = req->hdev;
461 struct hci_cp_write_def_link_policy cp;
462 u16 link_policy = 0;
463
464 if (lmp_rswitch_capable(hdev))
465 link_policy |= HCI_LP_RSWITCH;
466 if (lmp_hold_capable(hdev))
467 link_policy |= HCI_LP_HOLD;
468 if (lmp_sniff_capable(hdev))
469 link_policy |= HCI_LP_SNIFF;
470 if (lmp_park_capable(hdev))
471 link_policy |= HCI_LP_PARK;
472
473 cp.policy = cpu_to_le16(link_policy);
474 hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);
475}
476
477static void hci_set_le_support(struct hci_request *req)
478{
479 struct hci_dev *hdev = req->hdev;
480 struct hci_cp_write_le_host_supported cp;
481
482 memset(&cp, 0, sizeof(cp));
483
484 if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
485 cp.le = 0x01;
486 cp.simul = lmp_le_br_capable(hdev);
487 }
488
489 if (cp.le != lmp_host_le_capable(hdev))
490 hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp),
491 &cp);
492}
493
494static void hci_init3_req(struct hci_request *req, unsigned long opt)
495{
496 struct hci_dev *hdev = req->hdev;
497
498 if (hdev->commands[5] & 0x10)
499 hci_setup_link_policy(req);
500
501 if (lmp_le_capable(hdev)) {
502 hci_set_le_support(req);
503 hci_update_ad(req);
504 }
505}
506
507static int __hci_init(struct hci_dev *hdev)
508{
509 int err;
510
511 err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT);
512 if (err < 0)
513 return err;
514
515 /* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode
516 * BR/EDR/LE type controllers. AMP controllers only need the
517 * first stage init.
518 */
519 if (hdev->dev_type != HCI_BREDR)
520 return 0;
521
522 err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
523 if (err < 0)
524 return err;
525
526 return __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT);
527}
528
529static void hci_scan_req(struct hci_request *req, unsigned long opt)
242{ 530{
243 __u8 scan = opt; 531 __u8 scan = opt;
244 532
245 BT_DBG("%s %x", hdev->name, scan); 533 BT_DBG("%s %x", req->hdev->name, scan);
246 534
247 /* Inquiry and Page scans */ 535 /* Inquiry and Page scans */
248 hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); 536 hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
249} 537}
250 538
251static void hci_auth_req(struct hci_dev *hdev, unsigned long opt) 539static void hci_auth_req(struct hci_request *req, unsigned long opt)
252{ 540{
253 __u8 auth = opt; 541 __u8 auth = opt;
254 542
255 BT_DBG("%s %x", hdev->name, auth); 543 BT_DBG("%s %x", req->hdev->name, auth);
256 544
257 /* Authentication */ 545 /* Authentication */
258 hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); 546 hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
259} 547}
260 548
261static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) 549static void hci_encrypt_req(struct hci_request *req, unsigned long opt)
262{ 550{
263 __u8 encrypt = opt; 551 __u8 encrypt = opt;
264 552
265 BT_DBG("%s %x", hdev->name, encrypt); 553 BT_DBG("%s %x", req->hdev->name, encrypt);
266 554
267 /* Encryption */ 555 /* Encryption */
268 hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); 556 hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
269} 557}
270 558
271static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) 559static void hci_linkpol_req(struct hci_request *req, unsigned long opt)
272{ 560{
273 __le16 policy = cpu_to_le16(opt); 561 __le16 policy = cpu_to_le16(opt);
274 562
275 BT_DBG("%s %x", hdev->name, policy); 563 BT_DBG("%s %x", req->hdev->name, policy);
276 564
277 /* Default link policy */ 565 /* Default link policy */
278 hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); 566 hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
279} 567}
280 568
281/* Get HCI device by index. 569/* Get HCI device by index.
@@ -512,9 +800,10 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)
512 return copied; 800 return copied;
513} 801}
514 802
515static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) 803static void hci_inq_req(struct hci_request *req, unsigned long opt)
516{ 804{
517 struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt; 805 struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt;
806 struct hci_dev *hdev = req->hdev;
518 struct hci_cp_inquiry cp; 807 struct hci_cp_inquiry cp;
519 808
520 BT_DBG("%s", hdev->name); 809 BT_DBG("%s", hdev->name);
@@ -526,7 +815,7 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt)
526 memcpy(&cp.lap, &ir->lap, 3); 815 memcpy(&cp.lap, &ir->lap, 3);
527 cp.length = ir->length; 816 cp.length = ir->length;
528 cp.num_rsp = ir->num_rsp; 817 cp.num_rsp = ir->num_rsp;
529 hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); 818 hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
530} 819}
531 820
532int hci_inquiry(void __user *arg) 821int hci_inquiry(void __user *arg)
@@ -556,7 +845,8 @@ int hci_inquiry(void __user *arg)
556 timeo = ir.length * msecs_to_jiffies(2000); 845 timeo = ir.length * msecs_to_jiffies(2000);
557 846
558 if (do_inquiry) { 847 if (do_inquiry) {
559 err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo); 848 err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir,
849 timeo);
560 if (err < 0) 850 if (err < 0)
561 goto done; 851 goto done;
562 } 852 }
@@ -654,39 +944,29 @@ static u8 create_ad(struct hci_dev *hdev, u8 *ptr)
654 return ad_len; 944 return ad_len;
655} 945}
656 946
657int hci_update_ad(struct hci_dev *hdev) 947void hci_update_ad(struct hci_request *req)
658{ 948{
949 struct hci_dev *hdev = req->hdev;
659 struct hci_cp_le_set_adv_data cp; 950 struct hci_cp_le_set_adv_data cp;
660 u8 len; 951 u8 len;
661 int err;
662 952
663 hci_dev_lock(hdev); 953 if (!lmp_le_capable(hdev))
664 954 return;
665 if (!lmp_le_capable(hdev)) {
666 err = -EINVAL;
667 goto unlock;
668 }
669 955
670 memset(&cp, 0, sizeof(cp)); 956 memset(&cp, 0, sizeof(cp));
671 957
672 len = create_ad(hdev, cp.data); 958 len = create_ad(hdev, cp.data);
673 959
674 if (hdev->adv_data_len == len && 960 if (hdev->adv_data_len == len &&
675 memcmp(cp.data, hdev->adv_data, len) == 0) { 961 memcmp(cp.data, hdev->adv_data, len) == 0)
676 err = 0; 962 return;
677 goto unlock;
678 }
679 963
680 memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); 964 memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
681 hdev->adv_data_len = len; 965 hdev->adv_data_len = len;
682 966
683 cp.length = len; 967 cp.length = len;
684 err = hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
685 968
686unlock: 969 hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
687 hci_dev_unlock(hdev);
688
689 return err;
690} 970}
691 971
692/* ---- HCI ioctl helpers ---- */ 972/* ---- HCI ioctl helpers ---- */
@@ -735,10 +1015,7 @@ int hci_dev_open(__u16 dev)
735 if (!test_bit(HCI_RAW, &hdev->flags)) { 1015 if (!test_bit(HCI_RAW, &hdev->flags)) {
736 atomic_set(&hdev->cmd_cnt, 1); 1016 atomic_set(&hdev->cmd_cnt, 1);
737 set_bit(HCI_INIT, &hdev->flags); 1017 set_bit(HCI_INIT, &hdev->flags);
738 hdev->init_last_cmd = 0; 1018 ret = __hci_init(hdev);
739
740 ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT);
741
742 clear_bit(HCI_INIT, &hdev->flags); 1019 clear_bit(HCI_INIT, &hdev->flags);
743 } 1020 }
744 1021
@@ -746,7 +1023,6 @@ int hci_dev_open(__u16 dev)
746 hci_dev_hold(hdev); 1023 hci_dev_hold(hdev);
747 set_bit(HCI_UP, &hdev->flags); 1024 set_bit(HCI_UP, &hdev->flags);
748 hci_notify(hdev, HCI_DEV_UP); 1025 hci_notify(hdev, HCI_DEV_UP);
749 hci_update_ad(hdev);
750 if (!test_bit(HCI_SETUP, &hdev->dev_flags) && 1026 if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
751 mgmt_valid_hdev(hdev)) { 1027 mgmt_valid_hdev(hdev)) {
752 hci_dev_lock(hdev); 1028 hci_dev_lock(hdev);
@@ -828,7 +1104,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
828 if (!test_bit(HCI_RAW, &hdev->flags) && 1104 if (!test_bit(HCI_RAW, &hdev->flags) &&
829 test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { 1105 test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
830 set_bit(HCI_INIT, &hdev->flags); 1106 set_bit(HCI_INIT, &hdev->flags);
831 __hci_request(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); 1107 __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
832 clear_bit(HCI_INIT, &hdev->flags); 1108 clear_bit(HCI_INIT, &hdev->flags);
833 } 1109 }
834 1110
@@ -851,6 +1127,10 @@ static int hci_dev_do_close(struct hci_dev *hdev)
851 * and no tasks are scheduled. */ 1127 * and no tasks are scheduled. */
852 hdev->close(hdev); 1128 hdev->close(hdev);
853 1129
1130 /* Clear flags */
1131 hdev->flags = 0;
1132 hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
1133
854 if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) && 1134 if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
855 mgmt_valid_hdev(hdev)) { 1135 mgmt_valid_hdev(hdev)) {
856 hci_dev_lock(hdev); 1136 hci_dev_lock(hdev);
@@ -858,9 +1138,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
858 hci_dev_unlock(hdev); 1138 hci_dev_unlock(hdev);
859 } 1139 }
860 1140
861 /* Clear flags */
862 hdev->flags = 0;
863
864 /* Controller radio is available but is currently powered down */ 1141 /* Controller radio is available but is currently powered down */
865 hdev->amp_status = 0; 1142 hdev->amp_status = 0;
866 1143
@@ -921,7 +1198,7 @@ int hci_dev_reset(__u16 dev)
921 hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; 1198 hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
922 1199
923 if (!test_bit(HCI_RAW, &hdev->flags)) 1200 if (!test_bit(HCI_RAW, &hdev->flags))
924 ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); 1201 ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
925 1202
926done: 1203done:
927 hci_req_unlock(hdev); 1204 hci_req_unlock(hdev);
@@ -960,8 +1237,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
960 1237
961 switch (cmd) { 1238 switch (cmd) {
962 case HCISETAUTH: 1239 case HCISETAUTH:
963 err = hci_request(hdev, hci_auth_req, dr.dev_opt, 1240 err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
964 HCI_INIT_TIMEOUT); 1241 HCI_INIT_TIMEOUT);
965 break; 1242 break;
966 1243
967 case HCISETENCRYPT: 1244 case HCISETENCRYPT:
@@ -972,24 +1249,24 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
972 1249
973 if (!test_bit(HCI_AUTH, &hdev->flags)) { 1250 if (!test_bit(HCI_AUTH, &hdev->flags)) {
974 /* Auth must be enabled first */ 1251 /* Auth must be enabled first */
975 err = hci_request(hdev, hci_auth_req, dr.dev_opt, 1252 err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
976 HCI_INIT_TIMEOUT); 1253 HCI_INIT_TIMEOUT);
977 if (err) 1254 if (err)
978 break; 1255 break;
979 } 1256 }
980 1257
981 err = hci_request(hdev, hci_encrypt_req, dr.dev_opt, 1258 err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
982 HCI_INIT_TIMEOUT); 1259 HCI_INIT_TIMEOUT);
983 break; 1260 break;
984 1261
985 case HCISETSCAN: 1262 case HCISETSCAN:
986 err = hci_request(hdev, hci_scan_req, dr.dev_opt, 1263 err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
987 HCI_INIT_TIMEOUT); 1264 HCI_INIT_TIMEOUT);
988 break; 1265 break;
989 1266
990 case HCISETLINKPOL: 1267 case HCISETLINKPOL:
991 err = hci_request(hdev, hci_linkpol_req, dr.dev_opt, 1268 err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
992 HCI_INIT_TIMEOUT); 1269 HCI_INIT_TIMEOUT);
993 break; 1270 break;
994 1271
995 case HCISETLINKMODE: 1272 case HCISETLINKMODE:
@@ -1566,7 +1843,7 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
1566 return mgmt_device_unblocked(hdev, bdaddr, type); 1843 return mgmt_device_unblocked(hdev, bdaddr, type);
1567} 1844}
1568 1845
1569static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt) 1846static void le_scan_param_req(struct hci_request *req, unsigned long opt)
1570{ 1847{
1571 struct le_scan_params *param = (struct le_scan_params *) opt; 1848 struct le_scan_params *param = (struct le_scan_params *) opt;
1572 struct hci_cp_le_set_scan_param cp; 1849 struct hci_cp_le_set_scan_param cp;
@@ -1576,10 +1853,10 @@ static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt)
1576 cp.interval = cpu_to_le16(param->interval); 1853 cp.interval = cpu_to_le16(param->interval);
1577 cp.window = cpu_to_le16(param->window); 1854 cp.window = cpu_to_le16(param->window);
1578 1855
1579 hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); 1856 hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp);
1580} 1857}
1581 1858
1582static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt) 1859static void le_scan_enable_req(struct hci_request *req, unsigned long opt)
1583{ 1860{
1584 struct hci_cp_le_set_scan_enable cp; 1861 struct hci_cp_le_set_scan_enable cp;
1585 1862
@@ -1587,7 +1864,7 @@ static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt)
1587 cp.enable = 1; 1864 cp.enable = 1;
1588 cp.filter_dup = 1; 1865 cp.filter_dup = 1;
1589 1866
1590 hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); 1867 hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
1591} 1868}
1592 1869
1593static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, 1870static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
@@ -1608,10 +1885,10 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
1608 1885
1609 hci_req_lock(hdev); 1886 hci_req_lock(hdev);
1610 1887
1611 err = __hci_request(hdev, le_scan_param_req, (unsigned long) &param, 1888 err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) &param,
1612 timeo); 1889 timeo);
1613 if (!err) 1890 if (!err)
1614 err = __hci_request(hdev, le_scan_enable_req, 0, timeo); 1891 err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo);
1615 1892
1616 hci_req_unlock(hdev); 1893 hci_req_unlock(hdev);
1617 1894
@@ -2160,20 +2437,55 @@ static int hci_send_frame(struct sk_buff *skb)
2160 return hdev->send(skb); 2437 return hdev->send(skb);
2161} 2438}
2162 2439
2163/* Send HCI command */ 2440void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
2164int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) 2441{
2442 skb_queue_head_init(&req->cmd_q);
2443 req->hdev = hdev;
2444 req->err = 0;
2445}
2446
2447int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
2448{
2449 struct hci_dev *hdev = req->hdev;
2450 struct sk_buff *skb;
2451 unsigned long flags;
2452
2453 BT_DBG("length %u", skb_queue_len(&req->cmd_q));
2454
2455 /* If an error occured during request building, remove all HCI
2456 * commands queued on the HCI request queue.
2457 */
2458 if (req->err) {
2459 skb_queue_purge(&req->cmd_q);
2460 return req->err;
2461 }
2462
2463 /* Do not allow empty requests */
2464 if (skb_queue_empty(&req->cmd_q))
2465 return -ENODATA;
2466
2467 skb = skb_peek_tail(&req->cmd_q);
2468 bt_cb(skb)->req.complete = complete;
2469
2470 spin_lock_irqsave(&hdev->cmd_q.lock, flags);
2471 skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
2472 spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
2473
2474 queue_work(hdev->workqueue, &hdev->cmd_work);
2475
2476 return 0;
2477}
2478
2479static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode,
2480 u32 plen, void *param)
2165{ 2481{
2166 int len = HCI_COMMAND_HDR_SIZE + plen; 2482 int len = HCI_COMMAND_HDR_SIZE + plen;
2167 struct hci_command_hdr *hdr; 2483 struct hci_command_hdr *hdr;
2168 struct sk_buff *skb; 2484 struct sk_buff *skb;
2169 2485
2170 BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
2171
2172 skb = bt_skb_alloc(len, GFP_ATOMIC); 2486 skb = bt_skb_alloc(len, GFP_ATOMIC);
2173 if (!skb) { 2487 if (!skb)
2174 BT_ERR("%s no memory for command", hdev->name); 2488 return NULL;
2175 return -ENOMEM;
2176 }
2177 2489
2178 hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); 2490 hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
2179 hdr->opcode = cpu_to_le16(opcode); 2491 hdr->opcode = cpu_to_le16(opcode);
@@ -2187,8 +2499,26 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
2187 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; 2499 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
2188 skb->dev = (void *) hdev; 2500 skb->dev = (void *) hdev;
2189 2501
2190 if (test_bit(HCI_INIT, &hdev->flags)) 2502 return skb;
2191 hdev->init_last_cmd = opcode; 2503}
2504
2505/* Send HCI command */
2506int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
2507{
2508 struct sk_buff *skb;
2509
2510 BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
2511
2512 skb = hci_prepare_cmd(hdev, opcode, plen, param);
2513 if (!skb) {
2514 BT_ERR("%s no memory for command", hdev->name);
2515 return -ENOMEM;
2516 }
2517
2518 /* Stand-alone HCI commands must be flaged as
2519 * single-command requests.
2520 */
2521 bt_cb(skb)->req.start = true;
2192 2522
2193 skb_queue_tail(&hdev->cmd_q, skb); 2523 skb_queue_tail(&hdev->cmd_q, skb);
2194 queue_work(hdev->workqueue, &hdev->cmd_work); 2524 queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -2196,6 +2526,34 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
2196 return 0; 2526 return 0;
2197} 2527}
2198 2528
2529/* Queue a command to an asynchronous HCI request */
2530void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param)
2531{
2532 struct hci_dev *hdev = req->hdev;
2533 struct sk_buff *skb;
2534
2535 BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
2536
2537 /* If an error occured during request building, there is no point in
2538 * queueing the HCI command. We can simply return.
2539 */
2540 if (req->err)
2541 return;
2542
2543 skb = hci_prepare_cmd(hdev, opcode, plen, param);
2544 if (!skb) {
2545 BT_ERR("%s no memory for command (opcode 0x%4.4x)",
2546 hdev->name, opcode);
2547 req->err = -ENOMEM;
2548 return;
2549 }
2550
2551 if (skb_queue_empty(&req->cmd_q))
2552 bt_cb(skb)->req.start = true;
2553
2554 skb_queue_tail(&req->cmd_q, skb);
2555}
2556
2199/* Get data from the previously sent command */ 2557/* Get data from the previously sent command */
2200void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) 2558void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
2201{ 2559{
@@ -2398,7 +2756,7 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
2398 if (c->type == type && c->sent) { 2756 if (c->type == type && c->sent) {
2399 BT_ERR("%s killing stalled connection %pMR", 2757 BT_ERR("%s killing stalled connection %pMR",
2400 hdev->name, &c->dst); 2758 hdev->name, &c->dst);
2401 hci_acl_disconn(c, HCI_ERROR_REMOTE_USER_TERM); 2759 hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
2402 } 2760 }
2403 } 2761 }
2404 2762
@@ -2860,6 +3218,123 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
2860 kfree_skb(skb); 3218 kfree_skb(skb);
2861} 3219}
2862 3220
3221static bool hci_req_is_complete(struct hci_dev *hdev)
3222{
3223 struct sk_buff *skb;
3224
3225 skb = skb_peek(&hdev->cmd_q);
3226 if (!skb)
3227 return true;
3228
3229 return bt_cb(skb)->req.start;
3230}
3231
3232static void hci_resend_last(struct hci_dev *hdev)
3233{
3234 struct hci_command_hdr *sent;
3235 struct sk_buff *skb;
3236 u16 opcode;
3237
3238 if (!hdev->sent_cmd)
3239 return;
3240
3241 sent = (void *) hdev->sent_cmd->data;
3242 opcode = __le16_to_cpu(sent->opcode);
3243 if (opcode == HCI_OP_RESET)
3244 return;
3245
3246 skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
3247 if (!skb)
3248 return;
3249
3250 skb_queue_head(&hdev->cmd_q, skb);
3251 queue_work(hdev->workqueue, &hdev->cmd_work);
3252}
3253
3254void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
3255{
3256 hci_req_complete_t req_complete = NULL;
3257 struct sk_buff *skb;
3258 unsigned long flags;
3259
3260 BT_DBG("opcode 0x%04x status 0x%02x", opcode, status);
3261
3262 /* If the completed command doesn't match the last one that was
3263 * sent we need to do special handling of it.
3264 */
3265 if (!hci_sent_cmd_data(hdev, opcode)) {
3266 /* Some CSR based controllers generate a spontaneous
3267 * reset complete event during init and any pending
3268 * command will never be completed. In such a case we
3269 * need to resend whatever was the last sent
3270 * command.
3271 */
3272 if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET)
3273 hci_resend_last(hdev);
3274
3275 return;
3276 }
3277
3278 /* If the command succeeded and there's still more commands in
3279 * this request the request is not yet complete.
3280 */
3281 if (!status && !hci_req_is_complete(hdev))
3282 return;
3283
3284 /* If this was the last command in a request the complete
3285 * callback would be found in hdev->sent_cmd instead of the
3286 * command queue (hdev->cmd_q).
3287 */
3288 if (hdev->sent_cmd) {
3289 req_complete = bt_cb(hdev->sent_cmd)->req.complete;
3290 if (req_complete)
3291 goto call_complete;
3292 }
3293
3294 /* Remove all pending commands belonging to this request */
3295 spin_lock_irqsave(&hdev->cmd_q.lock, flags);
3296 while ((skb = __skb_dequeue(&hdev->cmd_q))) {
3297 if (bt_cb(skb)->req.start) {
3298 __skb_queue_head(&hdev->cmd_q, skb);
3299 break;
3300 }
3301
3302 req_complete = bt_cb(skb)->req.complete;
3303 kfree_skb(skb);
3304 }
3305 spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
3306
3307call_complete:
3308 if (req_complete)
3309 req_complete(hdev, status);
3310}
3311
3312void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status)
3313{
3314 hci_req_complete_t req_complete = NULL;
3315
3316 BT_DBG("opcode 0x%04x status 0x%02x", opcode, status);
3317
3318 if (status) {
3319 hci_req_cmd_complete(hdev, opcode, status);
3320 return;
3321 }
3322
3323 /* No need to handle success status if there are more commands */
3324 if (!hci_req_is_complete(hdev))
3325 return;
3326
3327 if (hdev->sent_cmd)
3328 req_complete = bt_cb(hdev->sent_cmd)->req.complete;
3329
3330 /* If the request doesn't have a complete callback or there
3331 * are other commands/requests in the hdev queue we consider
3332 * this request as completed.
3333 */
3334 if (!req_complete || !skb_queue_empty(&hdev->cmd_q))
3335 hci_req_cmd_complete(hdev, opcode, status);
3336}
3337
2863static void hci_rx_work(struct work_struct *work) 3338static void hci_rx_work(struct work_struct *work)
2864{ 3339{
2865 struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work); 3340 struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 477726a63512..138580745c2c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -53,7 +53,7 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
53 hci_discovery_set_state(hdev, DISCOVERY_STOPPED); 53 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
54 hci_dev_unlock(hdev); 54 hci_dev_unlock(hdev);
55 55
56 hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); 56 hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status);
57 57
58 hci_conn_check_pending(hdev); 58 hci_conn_check_pending(hdev);
59} 59}
@@ -183,8 +183,6 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev,
183 183
184 if (!status) 184 if (!status)
185 hdev->link_policy = get_unaligned_le16(sent); 185 hdev->link_policy = get_unaligned_le16(sent);
186
187 hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status);
188} 186}
189 187
190static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) 188static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
@@ -195,11 +193,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
195 193
196 clear_bit(HCI_RESET, &hdev->flags); 194 clear_bit(HCI_RESET, &hdev->flags);
197 195
198 hci_req_complete(hdev, HCI_OP_RESET, status);
199
200 /* Reset all non-persistent flags */ 196 /* Reset all non-persistent flags */
201 hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) | 197 hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
202 BIT(HCI_PERIODIC_INQ));
203 198
204 hdev->discovery.state = DISCOVERY_STOPPED; 199 hdev->discovery.state = DISCOVERY_STOPPED;
205 hdev->inq_tx_power = HCI_TX_POWER_INVALID; 200 hdev->inq_tx_power = HCI_TX_POWER_INVALID;
@@ -228,11 +223,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
228 memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH); 223 memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH);
229 224
230 hci_dev_unlock(hdev); 225 hci_dev_unlock(hdev);
231
232 if (!status && !test_bit(HCI_INIT, &hdev->flags))
233 hci_update_ad(hdev);
234
235 hci_req_complete(hdev, HCI_OP_WRITE_LOCAL_NAME, status);
236} 226}
237 227
238static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) 228static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
@@ -270,8 +260,6 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
270 260
271 if (test_bit(HCI_MGMT, &hdev->dev_flags)) 261 if (test_bit(HCI_MGMT, &hdev->dev_flags))
272 mgmt_auth_enable_complete(hdev, status); 262 mgmt_auth_enable_complete(hdev, status);
273
274 hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status);
275} 263}
276 264
277static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) 265static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -293,8 +281,6 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
293 else 281 else
294 clear_bit(HCI_ENCRYPT, &hdev->flags); 282 clear_bit(HCI_ENCRYPT, &hdev->flags);
295 } 283 }
296
297 hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status);
298} 284}
299 285
300static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) 286static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
@@ -343,7 +329,6 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
343 329
344done: 330done:
345 hci_dev_unlock(hdev); 331 hci_dev_unlock(hdev);
346 hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status);
347} 332}
348 333
349static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) 334static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb)
@@ -435,15 +420,6 @@ static void hci_cc_write_voice_setting(struct hci_dev *hdev,
435 hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); 420 hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING);
436} 421}
437 422
438static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
439{
440 __u8 status = *((__u8 *) skb->data);
441
442 BT_DBG("%s status 0x%2.2x", hdev->name, status);
443
444 hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status);
445}
446
447static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) 423static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
448{ 424{
449 __u8 status = *((__u8 *) skb->data); 425 __u8 status = *((__u8 *) skb->data);
@@ -472,211 +448,6 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
472 } 448 }
473} 449}
474 450
475static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
476{
477 if (lmp_ext_inq_capable(hdev))
478 return 2;
479
480 if (lmp_inq_rssi_capable(hdev))
481 return 1;
482
483 if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
484 hdev->lmp_subver == 0x0757)
485 return 1;
486
487 if (hdev->manufacturer == 15) {
488 if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
489 return 1;
490 if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
491 return 1;
492 if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
493 return 1;
494 }
495
496 if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
497 hdev->lmp_subver == 0x1805)
498 return 1;
499
500 return 0;
501}
502
503static void hci_setup_inquiry_mode(struct hci_dev *hdev)
504{
505 u8 mode;
506
507 mode = hci_get_inquiry_mode(hdev);
508
509 hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
510}
511
512static void hci_setup_event_mask(struct hci_dev *hdev)
513{
514 /* The second byte is 0xff instead of 0x9f (two reserved bits
515 * disabled) since a Broadcom 1.2 dongle doesn't respond to the
516 * command otherwise */
517 u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
518
519 /* CSR 1.1 dongles does not accept any bitfield so don't try to set
520 * any event mask for pre 1.2 devices */
521 if (hdev->hci_ver < BLUETOOTH_VER_1_2)
522 return;
523
524 if (lmp_bredr_capable(hdev)) {
525 events[4] |= 0x01; /* Flow Specification Complete */
526 events[4] |= 0x02; /* Inquiry Result with RSSI */
527 events[4] |= 0x04; /* Read Remote Extended Features Complete */
528 events[5] |= 0x08; /* Synchronous Connection Complete */
529 events[5] |= 0x10; /* Synchronous Connection Changed */
530 }
531
532 if (lmp_inq_rssi_capable(hdev))
533 events[4] |= 0x02; /* Inquiry Result with RSSI */
534
535 if (lmp_sniffsubr_capable(hdev))
536 events[5] |= 0x20; /* Sniff Subrating */
537
538 if (lmp_pause_enc_capable(hdev))
539 events[5] |= 0x80; /* Encryption Key Refresh Complete */
540
541 if (lmp_ext_inq_capable(hdev))
542 events[5] |= 0x40; /* Extended Inquiry Result */
543
544 if (lmp_no_flush_capable(hdev))
545 events[7] |= 0x01; /* Enhanced Flush Complete */
546
547 if (lmp_lsto_capable(hdev))
548 events[6] |= 0x80; /* Link Supervision Timeout Changed */
549
550 if (lmp_ssp_capable(hdev)) {
551 events[6] |= 0x01; /* IO Capability Request */
552 events[6] |= 0x02; /* IO Capability Response */
553 events[6] |= 0x04; /* User Confirmation Request */
554 events[6] |= 0x08; /* User Passkey Request */
555 events[6] |= 0x10; /* Remote OOB Data Request */
556 events[6] |= 0x20; /* Simple Pairing Complete */
557 events[7] |= 0x04; /* User Passkey Notification */
558 events[7] |= 0x08; /* Keypress Notification */
559 events[7] |= 0x10; /* Remote Host Supported
560 * Features Notification */
561 }
562
563 if (lmp_le_capable(hdev))
564 events[7] |= 0x20; /* LE Meta-Event */
565
566 hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
567
568 if (lmp_le_capable(hdev)) {
569 memset(events, 0, sizeof(events));
570 events[0] = 0x1f;
571 hci_send_cmd(hdev, HCI_OP_LE_SET_EVENT_MASK,
572 sizeof(events), events);
573 }
574}
575
576static void bredr_setup(struct hci_dev *hdev)
577{
578 struct hci_cp_delete_stored_link_key cp;
579 __le16 param;
580 __u8 flt_type;
581
582 /* Read Buffer Size (ACL mtu, max pkt, etc.) */
583 hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL);
584
585 /* Read Class of Device */
586 hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL);
587
588 /* Read Local Name */
589 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL);
590
591 /* Read Voice Setting */
592 hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL);
593
594 /* Clear Event Filters */
595 flt_type = HCI_FLT_CLEAR_ALL;
596 hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
597
598 /* Connection accept timeout ~20 secs */
599 param = __constant_cpu_to_le16(0x7d00);
600 hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
601
602 bacpy(&cp.bdaddr, BDADDR_ANY);
603 cp.delete_all = 1;
604 hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
605}
606
607static void le_setup(struct hci_dev *hdev)
608{
609 /* Read LE Buffer Size */
610 hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
611
612 /* Read LE Local Supported Features */
613 hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL);
614
615 /* Read LE Advertising Channel TX Power */
616 hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
617
618 /* Read LE White List Size */
619 hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
620
621 /* Read LE Supported States */
622 hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
623}
624
625static void hci_setup(struct hci_dev *hdev)
626{
627 if (hdev->dev_type != HCI_BREDR)
628 return;
629
630 /* Read BD Address */
631 hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL);
632
633 if (lmp_bredr_capable(hdev))
634 bredr_setup(hdev);
635
636 if (lmp_le_capable(hdev))
637 le_setup(hdev);
638
639 hci_setup_event_mask(hdev);
640
641 if (hdev->hci_ver > BLUETOOTH_VER_1_1)
642 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
643
644 if (lmp_ssp_capable(hdev)) {
645 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
646 u8 mode = 0x01;
647 hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE,
648 sizeof(mode), &mode);
649 } else {
650 struct hci_cp_write_eir cp;
651
652 memset(hdev->eir, 0, sizeof(hdev->eir));
653 memset(&cp, 0, sizeof(cp));
654
655 hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
656 }
657 }
658
659 if (lmp_inq_rssi_capable(hdev))
660 hci_setup_inquiry_mode(hdev);
661
662 if (lmp_inq_tx_pwr_capable(hdev))
663 hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
664
665 if (lmp_ext_feat_capable(hdev)) {
666 struct hci_cp_read_local_ext_features cp;
667
668 cp.page = 0x01;
669 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp),
670 &cp);
671 }
672
673 if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
674 u8 enable = 1;
675 hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
676 &enable);
677 }
678}
679
680static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) 451static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
681{ 452{
682 struct hci_rp_read_local_version *rp = (void *) skb->data; 453 struct hci_rp_read_local_version *rp = (void *) skb->data;
@@ -684,7 +455,7 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
684 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); 455 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
685 456
686 if (rp->status) 457 if (rp->status)
687 goto done; 458 return;
688 459
689 hdev->hci_ver = rp->hci_ver; 460 hdev->hci_ver = rp->hci_ver;
690 hdev->hci_rev = __le16_to_cpu(rp->hci_rev); 461 hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
@@ -694,30 +465,6 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
694 465
695 BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name, 466 BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name,
696 hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); 467 hdev->manufacturer, hdev->hci_ver, hdev->hci_rev);
697
698 if (test_bit(HCI_INIT, &hdev->flags))
699 hci_setup(hdev);
700
701done:
702 hci_req_complete(hdev, HCI_OP_READ_LOCAL_VERSION, rp->status);
703}
704
705static void hci_setup_link_policy(struct hci_dev *hdev)
706{
707 struct hci_cp_write_def_link_policy cp;
708 u16 link_policy = 0;
709
710 if (lmp_rswitch_capable(hdev))
711 link_policy |= HCI_LP_RSWITCH;
712 if (lmp_hold_capable(hdev))
713 link_policy |= HCI_LP_HOLD;
714 if (lmp_sniff_capable(hdev))
715 link_policy |= HCI_LP_SNIFF;
716 if (lmp_park_capable(hdev))
717 link_policy |= HCI_LP_PARK;
718
719 cp.policy = cpu_to_le16(link_policy);
720 hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);
721} 468}
722 469
723static void hci_cc_read_local_commands(struct hci_dev *hdev, 470static void hci_cc_read_local_commands(struct hci_dev *hdev,
@@ -727,16 +474,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev,
727 474
728 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); 475 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
729 476
730 if (rp->status) 477 if (!rp->status)
731 goto done; 478 memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
732
733 memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
734
735 if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10))
736 hci_setup_link_policy(hdev);
737
738done:
739 hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status);
740} 479}
741 480
742static void hci_cc_read_local_features(struct hci_dev *hdev, 481static void hci_cc_read_local_features(struct hci_dev *hdev,
@@ -795,22 +534,6 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,
795 hdev->features[6], hdev->features[7]); 534 hdev->features[6], hdev->features[7]);
796} 535}
797 536
798static void hci_set_le_support(struct hci_dev *hdev)
799{
800 struct hci_cp_write_le_host_supported cp;
801
802 memset(&cp, 0, sizeof(cp));
803
804 if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
805 cp.le = 1;
806 cp.simul = lmp_le_br_capable(hdev);
807 }
808
809 if (cp.le != lmp_host_le_capable(hdev))
810 hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp),
811 &cp);
812}
813
814static void hci_cc_read_local_ext_features(struct hci_dev *hdev, 537static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
815 struct sk_buff *skb) 538 struct sk_buff *skb)
816{ 539{
@@ -819,7 +542,7 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
819 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); 542 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
820 543
821 if (rp->status) 544 if (rp->status)
822 goto done; 545 return;
823 546
824 switch (rp->page) { 547 switch (rp->page) {
825 case 0: 548 case 0:
@@ -829,12 +552,6 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
829 memcpy(hdev->host_features, rp->features, 8); 552 memcpy(hdev->host_features, rp->features, 8);
830 break; 553 break;
831 } 554 }
832
833 if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev))
834 hci_set_le_support(hdev);
835
836done:
837 hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status);
838} 555}
839 556
840static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, 557static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
@@ -844,12 +561,8 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
844 561
845 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); 562 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
846 563
847 if (rp->status) 564 if (!rp->status)
848 return; 565 hdev->flow_ctl_mode = rp->mode;
849
850 hdev->flow_ctl_mode = rp->mode;
851
852 hci_req_complete(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, rp->status);
853} 566}
854 567
855static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) 568static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
@@ -886,8 +599,65 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb)
886 599
887 if (!rp->status) 600 if (!rp->status)
888 bacpy(&hdev->bdaddr, &rp->bdaddr); 601 bacpy(&hdev->bdaddr, &rp->bdaddr);
602}
603
604static void hci_cc_read_page_scan_activity(struct hci_dev *hdev,
605 struct sk_buff *skb)
606{
607 struct hci_rp_read_page_scan_activity *rp = (void *) skb->data;
608
609 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
610
611 if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) {
612 hdev->page_scan_interval = __le16_to_cpu(rp->interval);
613 hdev->page_scan_window = __le16_to_cpu(rp->window);
614 }
615}
616
617static void hci_cc_write_page_scan_activity(struct hci_dev *hdev,
618 struct sk_buff *skb)
619{
620 u8 status = *((u8 *) skb->data);
621 struct hci_cp_write_page_scan_activity *sent;
622
623 BT_DBG("%s status 0x%2.2x", hdev->name, status);
624
625 if (status)
626 return;
627
628 sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY);
629 if (!sent)
630 return;
631
632 hdev->page_scan_interval = __le16_to_cpu(sent->interval);
633 hdev->page_scan_window = __le16_to_cpu(sent->window);
634}
635
636static void hci_cc_read_page_scan_type(struct hci_dev *hdev,
637 struct sk_buff *skb)
638{
639 struct hci_rp_read_page_scan_type *rp = (void *) skb->data;
640
641 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
642
643 if (test_bit(HCI_INIT, &hdev->flags) && !rp->status)
644 hdev->page_scan_type = rp->type;
645}
646
647static void hci_cc_write_page_scan_type(struct hci_dev *hdev,
648 struct sk_buff *skb)
649{
650 u8 status = *((u8 *) skb->data);
651 u8 *type;
889 652
890 hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status); 653 BT_DBG("%s status 0x%2.2x", hdev->name, status);
654
655 if (status)
656 return;
657
658 type = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE);
659 if (type)
660 hdev->page_scan_type = *type;
891} 661}
892 662
893static void hci_cc_read_data_block_size(struct hci_dev *hdev, 663static void hci_cc_read_data_block_size(struct hci_dev *hdev,
@@ -908,17 +678,6 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev,
908 678
909 BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu, 679 BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu,
910 hdev->block_cnt, hdev->block_len); 680 hdev->block_cnt, hdev->block_len);
911
912 hci_req_complete(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, rp->status);
913}
914
915static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb)
916{
917 __u8 status = *((__u8 *) skb->data);
918
919 BT_DBG("%s status 0x%2.2x", hdev->name, status);
920
921 hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status);
922} 681}
923 682
924static void hci_cc_read_local_amp_info(struct hci_dev *hdev, 683static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
@@ -942,8 +701,6 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
942 hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to); 701 hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to);
943 hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); 702 hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to);
944 703
945 hci_req_complete(hdev, HCI_OP_READ_LOCAL_AMP_INFO, rp->status);
946
947a2mp_rsp: 704a2mp_rsp:
948 a2mp_send_getinfo_rsp(hdev); 705 a2mp_send_getinfo_rsp(hdev);
949} 706}
@@ -985,35 +742,6 @@ a2mp_rsp:
985 a2mp_send_create_phy_link_req(hdev, rp->status); 742 a2mp_send_create_phy_link_req(hdev, rp->status);
986} 743}
987 744
988static void hci_cc_delete_stored_link_key(struct hci_dev *hdev,
989 struct sk_buff *skb)
990{
991 __u8 status = *((__u8 *) skb->data);
992
993 BT_DBG("%s status 0x%2.2x", hdev->name, status);
994
995 hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status);
996}
997
998static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
999{
1000 __u8 status = *((__u8 *) skb->data);
1001
1002 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1003
1004 hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status);
1005}
1006
1007static void hci_cc_write_inquiry_mode(struct hci_dev *hdev,
1008 struct sk_buff *skb)
1009{
1010 __u8 status = *((__u8 *) skb->data);
1011
1012 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1013
1014 hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status);
1015}
1016
1017static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, 745static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
1018 struct sk_buff *skb) 746 struct sk_buff *skb)
1019{ 747{
@@ -1023,17 +751,6 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
1023 751
1024 if (!rp->status) 752 if (!rp->status)
1025 hdev->inq_tx_power = rp->tx_power; 753 hdev->inq_tx_power = rp->tx_power;
1026
1027 hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, rp->status);
1028}
1029
1030static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb)
1031{
1032 __u8 status = *((__u8 *) skb->data);
1033
1034 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1035
1036 hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status);
1037} 754}
1038 755
1039static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) 756static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1095,8 +812,6 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev,
1095 hdev->le_cnt = hdev->le_pkts; 812 hdev->le_cnt = hdev->le_pkts;
1096 813
1097 BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts); 814 BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
1098
1099 hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status);
1100} 815}
1101 816
1102static void hci_cc_le_read_local_features(struct hci_dev *hdev, 817static void hci_cc_le_read_local_features(struct hci_dev *hdev,
@@ -1108,8 +823,6 @@ static void hci_cc_le_read_local_features(struct hci_dev *hdev,
1108 823
1109 if (!rp->status) 824 if (!rp->status)
1110 memcpy(hdev->le_features, rp->features, 8); 825 memcpy(hdev->le_features, rp->features, 8);
1111
1112 hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status);
1113} 826}
1114 827
1115static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, 828static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
@@ -1119,22 +832,8 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
1119 832
1120 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); 833 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
1121 834
1122 if (!rp->status) { 835 if (!rp->status)
1123 hdev->adv_tx_power = rp->tx_power; 836 hdev->adv_tx_power = rp->tx_power;
1124 if (!test_bit(HCI_INIT, &hdev->flags))
1125 hci_update_ad(hdev);
1126 }
1127
1128 hci_req_complete(hdev, HCI_OP_LE_READ_ADV_TX_POWER, rp->status);
1129}
1130
1131static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
1132{
1133 __u8 status = *((__u8 *) skb->data);
1134
1135 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1136
1137 hci_req_complete(hdev, HCI_OP_LE_SET_EVENT_MASK, status);
1138} 837}
1139 838
1140static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) 839static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1231,12 +930,15 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
1231 clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags); 930 clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags);
1232 } 931 }
1233 932
1234 hci_dev_unlock(hdev); 933 if (!test_bit(HCI_INIT, &hdev->flags)) {
934 struct hci_request req;
1235 935
1236 if (!test_bit(HCI_INIT, &hdev->flags)) 936 hci_req_init(&req, hdev);
1237 hci_update_ad(hdev); 937 hci_update_ad(&req);
938 hci_req_run(&req, NULL);
939 }
1238 940
1239 hci_req_complete(hdev, HCI_OP_LE_SET_ADV_ENABLE, status); 941 hci_dev_unlock(hdev);
1240} 942}
1241 943
1242static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) 944static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1245,8 +947,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
1245 947
1246 BT_DBG("%s status 0x%2.2x", hdev->name, status); 948 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1247 949
1248 hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_PARAM, status);
1249
1250 if (status) { 950 if (status) {
1251 hci_dev_lock(hdev); 951 hci_dev_lock(hdev);
1252 mgmt_start_discovery_failed(hdev, status); 952 mgmt_start_discovery_failed(hdev, status);
@@ -1269,8 +969,6 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
1269 969
1270 switch (cp->enable) { 970 switch (cp->enable) {
1271 case LE_SCANNING_ENABLED: 971 case LE_SCANNING_ENABLED:
1272 hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_ENABLE, status);
1273
1274 if (status) { 972 if (status) {
1275 hci_dev_lock(hdev); 973 hci_dev_lock(hdev);
1276 mgmt_start_discovery_failed(hdev, status); 974 mgmt_start_discovery_failed(hdev, status);
@@ -1321,32 +1019,6 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
1321 1019
1322 if (!rp->status) 1020 if (!rp->status)
1323 hdev->le_white_list_size = rp->size; 1021 hdev->le_white_list_size = rp->size;
1324
1325 hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status);
1326}
1327
1328static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb)
1329{
1330 struct hci_rp_le_ltk_reply *rp = (void *) skb->data;
1331
1332 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
1333
1334 if (rp->status)
1335 return;
1336
1337 hci_req_complete(hdev, HCI_OP_LE_LTK_REPLY, rp->status);
1338}
1339
1340static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb)
1341{
1342 struct hci_rp_le_ltk_neg_reply *rp = (void *) skb->data;
1343
1344 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
1345
1346 if (rp->status)
1347 return;
1348
1349 hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status);
1350} 1022}
1351 1023
1352static void hci_cc_le_read_supported_states(struct hci_dev *hdev, 1024static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
@@ -1358,8 +1030,6 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
1358 1030
1359 if (!rp->status) 1031 if (!rp->status)
1360 memcpy(hdev->le_states, rp->le_states, 8); 1032 memcpy(hdev->le_states, rp->le_states, 8);
1361
1362 hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status);
1363} 1033}
1364 1034
1365static void hci_cc_write_le_host_supported(struct hci_dev *hdev, 1035static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
@@ -1389,8 +1059,6 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
1389 if (test_bit(HCI_MGMT, &hdev->dev_flags) && 1059 if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
1390 !test_bit(HCI_INIT, &hdev->flags)) 1060 !test_bit(HCI_INIT, &hdev->flags))
1391 mgmt_le_enable_complete(hdev, sent->le, status); 1061 mgmt_le_enable_complete(hdev, sent->le, status);
1392
1393 hci_req_complete(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, status);
1394} 1062}
1395 1063
1396static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev, 1064static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev,
@@ -1412,7 +1080,6 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
1412 BT_DBG("%s status 0x%2.2x", hdev->name, status); 1080 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1413 1081
1414 if (status) { 1082 if (status) {
1415 hci_req_complete(hdev, HCI_OP_INQUIRY, status);
1416 hci_conn_check_pending(hdev); 1083 hci_conn_check_pending(hdev);
1417 hci_dev_lock(hdev); 1084 hci_dev_lock(hdev);
1418 if (test_bit(HCI_MGMT, &hdev->dev_flags)) 1085 if (test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -1884,11 +1551,6 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
1884 } 1551 }
1885} 1552}
1886 1553
1887static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status)
1888{
1889 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1890}
1891
1892static void hci_cs_create_phylink(struct hci_dev *hdev, u8 status) 1554static void hci_cs_create_phylink(struct hci_dev *hdev, u8 status)
1893{ 1555{
1894 struct hci_cp_create_phy_link *cp; 1556 struct hci_cp_create_phy_link *cp;
@@ -1930,11 +1592,6 @@ static void hci_cs_accept_phylink(struct hci_dev *hdev, u8 status)
1930 amp_write_remote_assoc(hdev, cp->phy_handle); 1592 amp_write_remote_assoc(hdev, cp->phy_handle);
1931} 1593}
1932 1594
1933static void hci_cs_create_logical_link(struct hci_dev *hdev, u8 status)
1934{
1935 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1936}
1937
1938static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) 1595static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
1939{ 1596{
1940 __u8 status = *((__u8 *) skb->data); 1597 __u8 status = *((__u8 *) skb->data);
@@ -1943,7 +1600,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
1943 1600
1944 BT_DBG("%s status 0x%2.2x", hdev->name, status); 1601 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1945 1602
1946 hci_req_complete(hdev, HCI_OP_INQUIRY, status); 1603 hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status);
1947 1604
1948 hci_conn_check_pending(hdev); 1605 hci_conn_check_pending(hdev);
1949 1606
@@ -2399,7 +2056,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
2399 clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); 2056 clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
2400 2057
2401 if (ev->status && conn->state == BT_CONNECTED) { 2058 if (ev->status && conn->state == BT_CONNECTED) {
2402 hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); 2059 hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
2403 hci_conn_put(conn); 2060 hci_conn_put(conn);
2404 goto unlock; 2061 goto unlock;
2405 } 2062 }
@@ -2491,20 +2148,10 @@ unlock:
2491 hci_dev_unlock(hdev); 2148 hci_dev_unlock(hdev);
2492} 2149}
2493 2150
2494static void hci_remote_version_evt(struct hci_dev *hdev, struct sk_buff *skb)
2495{
2496 BT_DBG("%s", hdev->name);
2497}
2498
2499static void hci_qos_setup_complete_evt(struct hci_dev *hdev,
2500 struct sk_buff *skb)
2501{
2502 BT_DBG("%s", hdev->name);
2503}
2504
2505static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) 2151static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2506{ 2152{
2507 struct hci_ev_cmd_complete *ev = (void *) skb->data; 2153 struct hci_ev_cmd_complete *ev = (void *) skb->data;
2154 u8 status = skb->data[sizeof(*ev)];
2508 __u16 opcode; 2155 __u16 opcode;
2509 2156
2510 skb_pull(skb, sizeof(*ev)); 2157 skb_pull(skb, sizeof(*ev));
@@ -2588,10 +2235,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2588 hci_cc_write_voice_setting(hdev, skb); 2235 hci_cc_write_voice_setting(hdev, skb);
2589 break; 2236 break;
2590 2237
2591 case HCI_OP_HOST_BUFFER_SIZE:
2592 hci_cc_host_buffer_size(hdev, skb);
2593 break;
2594
2595 case HCI_OP_WRITE_SSP_MODE: 2238 case HCI_OP_WRITE_SSP_MODE:
2596 hci_cc_write_ssp_mode(hdev, skb); 2239 hci_cc_write_ssp_mode(hdev, skb);
2597 break; 2240 break;
@@ -2620,46 +2263,42 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2620 hci_cc_read_bd_addr(hdev, skb); 2263 hci_cc_read_bd_addr(hdev, skb);
2621 break; 2264 break;
2622 2265
2623 case HCI_OP_READ_DATA_BLOCK_SIZE: 2266 case HCI_OP_READ_PAGE_SCAN_ACTIVITY:
2624 hci_cc_read_data_block_size(hdev, skb); 2267 hci_cc_read_page_scan_activity(hdev, skb);
2625 break; 2268 break;
2626 2269
2627 case HCI_OP_WRITE_CA_TIMEOUT: 2270 case HCI_OP_WRITE_PAGE_SCAN_ACTIVITY:
2628 hci_cc_write_ca_timeout(hdev, skb); 2271 hci_cc_write_page_scan_activity(hdev, skb);
2629 break; 2272 break;
2630 2273
2631 case HCI_OP_READ_FLOW_CONTROL_MODE: 2274 case HCI_OP_READ_PAGE_SCAN_TYPE:
2632 hci_cc_read_flow_control_mode(hdev, skb); 2275 hci_cc_read_page_scan_type(hdev, skb);
2633 break; 2276 break;
2634 2277
2635 case HCI_OP_READ_LOCAL_AMP_INFO: 2278 case HCI_OP_WRITE_PAGE_SCAN_TYPE:
2636 hci_cc_read_local_amp_info(hdev, skb); 2279 hci_cc_write_page_scan_type(hdev, skb);
2637 break; 2280 break;
2638 2281
2639 case HCI_OP_READ_LOCAL_AMP_ASSOC: 2282 case HCI_OP_READ_DATA_BLOCK_SIZE:
2640 hci_cc_read_local_amp_assoc(hdev, skb); 2283 hci_cc_read_data_block_size(hdev, skb);
2641 break; 2284 break;
2642 2285
2643 case HCI_OP_DELETE_STORED_LINK_KEY: 2286 case HCI_OP_READ_FLOW_CONTROL_MODE:
2644 hci_cc_delete_stored_link_key(hdev, skb); 2287 hci_cc_read_flow_control_mode(hdev, skb);
2645 break; 2288 break;
2646 2289
2647 case HCI_OP_SET_EVENT_MASK: 2290 case HCI_OP_READ_LOCAL_AMP_INFO:
2648 hci_cc_set_event_mask(hdev, skb); 2291 hci_cc_read_local_amp_info(hdev, skb);
2649 break; 2292 break;
2650 2293
2651 case HCI_OP_WRITE_INQUIRY_MODE: 2294 case HCI_OP_READ_LOCAL_AMP_ASSOC:
2652 hci_cc_write_inquiry_mode(hdev, skb); 2295 hci_cc_read_local_amp_assoc(hdev, skb);
2653 break; 2296 break;
2654 2297
2655 case HCI_OP_READ_INQ_RSP_TX_POWER: 2298 case HCI_OP_READ_INQ_RSP_TX_POWER:
2656 hci_cc_read_inq_rsp_tx_power(hdev, skb); 2299 hci_cc_read_inq_rsp_tx_power(hdev, skb);
2657 break; 2300 break;
2658 2301
2659 case HCI_OP_SET_EVENT_FLT:
2660 hci_cc_set_event_flt(hdev, skb);
2661 break;
2662
2663 case HCI_OP_PIN_CODE_REPLY: 2302 case HCI_OP_PIN_CODE_REPLY:
2664 hci_cc_pin_code_reply(hdev, skb); 2303 hci_cc_pin_code_reply(hdev, skb);
2665 break; 2304 break;
@@ -2684,10 +2323,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2684 hci_cc_le_read_adv_tx_power(hdev, skb); 2323 hci_cc_le_read_adv_tx_power(hdev, skb);
2685 break; 2324 break;
2686 2325
2687 case HCI_OP_LE_SET_EVENT_MASK:
2688 hci_cc_le_set_event_mask(hdev, skb);
2689 break;
2690
2691 case HCI_OP_USER_CONFIRM_REPLY: 2326 case HCI_OP_USER_CONFIRM_REPLY:
2692 hci_cc_user_confirm_reply(hdev, skb); 2327 hci_cc_user_confirm_reply(hdev, skb);
2693 break; 2328 break;
@@ -2720,14 +2355,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2720 hci_cc_le_read_white_list_size(hdev, skb); 2355 hci_cc_le_read_white_list_size(hdev, skb);
2721 break; 2356 break;
2722 2357
2723 case HCI_OP_LE_LTK_REPLY:
2724 hci_cc_le_ltk_reply(hdev, skb);
2725 break;
2726
2727 case HCI_OP_LE_LTK_NEG_REPLY:
2728 hci_cc_le_ltk_neg_reply(hdev, skb);
2729 break;
2730
2731 case HCI_OP_LE_READ_SUPPORTED_STATES: 2358 case HCI_OP_LE_READ_SUPPORTED_STATES:
2732 hci_cc_le_read_supported_states(hdev, skb); 2359 hci_cc_le_read_supported_states(hdev, skb);
2733 break; 2360 break;
@@ -2745,9 +2372,11 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2745 break; 2372 break;
2746 } 2373 }
2747 2374
2748 if (ev->opcode != HCI_OP_NOP) 2375 if (opcode != HCI_OP_NOP)
2749 del_timer(&hdev->cmd_timer); 2376 del_timer(&hdev->cmd_timer);
2750 2377
2378 hci_req_cmd_complete(hdev, opcode, status);
2379
2751 if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { 2380 if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
2752 atomic_set(&hdev->cmd_cnt, 1); 2381 atomic_set(&hdev->cmd_cnt, 1);
2753 if (!skb_queue_empty(&hdev->cmd_q)) 2382 if (!skb_queue_empty(&hdev->cmd_q))
@@ -2817,10 +2446,6 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
2817 hci_cs_le_create_conn(hdev, ev->status); 2446 hci_cs_le_create_conn(hdev, ev->status);
2818 break; 2447 break;
2819 2448
2820 case HCI_OP_LE_START_ENC:
2821 hci_cs_le_start_enc(hdev, ev->status);
2822 break;
2823
2824 case HCI_OP_CREATE_PHY_LINK: 2449 case HCI_OP_CREATE_PHY_LINK:
2825 hci_cs_create_phylink(hdev, ev->status); 2450 hci_cs_create_phylink(hdev, ev->status);
2826 break; 2451 break;
@@ -2829,18 +2454,16 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
2829 hci_cs_accept_phylink(hdev, ev->status); 2454 hci_cs_accept_phylink(hdev, ev->status);
2830 break; 2455 break;
2831 2456
2832 case HCI_OP_CREATE_LOGICAL_LINK:
2833 hci_cs_create_logical_link(hdev, ev->status);
2834 break;
2835
2836 default: 2457 default:
2837 BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); 2458 BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
2838 break; 2459 break;
2839 } 2460 }
2840 2461
2841 if (ev->opcode != HCI_OP_NOP) 2462 if (opcode != HCI_OP_NOP)
2842 del_timer(&hdev->cmd_timer); 2463 del_timer(&hdev->cmd_timer);
2843 2464
2465 hci_req_cmd_status(hdev, opcode, ev->status);
2466
2844 if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { 2467 if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
2845 atomic_set(&hdev->cmd_cnt, 1); 2468 atomic_set(&hdev->cmd_cnt, 1);
2846 if (!skb_queue_empty(&hdev->cmd_q)) 2469 if (!skb_queue_empty(&hdev->cmd_q))
@@ -3391,18 +3014,6 @@ unlock:
3391 hci_dev_unlock(hdev); 3014 hci_dev_unlock(hdev);
3392} 3015}
3393 3016
3394static void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buff *skb)
3395{
3396 BT_DBG("%s", hdev->name);
3397}
3398
3399static void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb)
3400{
3401 struct hci_ev_sniff_subrate *ev = (void *) skb->data;
3402
3403 BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
3404}
3405
3406static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, 3017static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
3407 struct sk_buff *skb) 3018 struct sk_buff *skb)
3408{ 3019{
@@ -3472,7 +3083,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
3472 clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); 3083 clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
3473 3084
3474 if (ev->status && conn->state == BT_CONNECTED) { 3085 if (ev->status && conn->state == BT_CONNECTED) {
3475 hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); 3086 hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
3476 hci_conn_put(conn); 3087 hci_conn_put(conn);
3477 goto unlock; 3088 goto unlock;
3478 } 3089 }
@@ -4130,14 +3741,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
4130 hci_remote_features_evt(hdev, skb); 3741 hci_remote_features_evt(hdev, skb);
4131 break; 3742 break;
4132 3743
4133 case HCI_EV_REMOTE_VERSION:
4134 hci_remote_version_evt(hdev, skb);
4135 break;
4136
4137 case HCI_EV_QOS_SETUP_COMPLETE:
4138 hci_qos_setup_complete_evt(hdev, skb);
4139 break;
4140
4141 case HCI_EV_CMD_COMPLETE: 3744 case HCI_EV_CMD_COMPLETE:
4142 hci_cmd_complete_evt(hdev, skb); 3745 hci_cmd_complete_evt(hdev, skb);
4143 break; 3746 break;
@@ -4194,14 +3797,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
4194 hci_sync_conn_complete_evt(hdev, skb); 3797 hci_sync_conn_complete_evt(hdev, skb);
4195 break; 3798 break;
4196 3799
4197 case HCI_EV_SYNC_CONN_CHANGED:
4198 hci_sync_conn_changed_evt(hdev, skb);
4199 break;
4200
4201 case HCI_EV_SNIFF_SUBRATE:
4202 hci_sniff_subrate_evt(hdev, skb);
4203 break;
4204
4205 case HCI_EV_EXTENDED_INQUIRY_RESULT: 3800 case HCI_EV_EXTENDED_INQUIRY_RESULT:
4206 hci_extended_inquiry_result_evt(hdev, skb); 3801 hci_extended_inquiry_result_evt(hdev, skb);
4207 break; 3802 break;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 6a93614f2c49..aa4354fca77c 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -854,6 +854,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
854 skb_queue_tail(&hdev->raw_q, skb); 854 skb_queue_tail(&hdev->raw_q, skb);
855 queue_work(hdev->workqueue, &hdev->tx_work); 855 queue_work(hdev->workqueue, &hdev->tx_work);
856 } else { 856 } else {
857 /* Stand-alone HCI commands must be flaged as
858 * single-command requests.
859 */
860 bt_cb(skb)->req.start = true;
861
857 skb_queue_tail(&hdev->cmd_q, skb); 862 skb_queue_tail(&hdev->cmd_q, skb);
858 queue_work(hdev->workqueue, &hdev->cmd_work); 863 queue_work(hdev->workqueue, &hdev->cmd_work);
859 } 864 }
@@ -1121,8 +1126,6 @@ error:
1121void hci_sock_cleanup(void) 1126void hci_sock_cleanup(void)
1122{ 1127{
1123 bt_procfs_cleanup(&init_net, "hci"); 1128 bt_procfs_cleanup(&init_net, "hci");
1124 if (bt_sock_unregister(BTPROTO_HCI) < 0) 1129 bt_sock_unregister(BTPROTO_HCI);
1125 BT_ERR("HCI socket unregistration failed");
1126
1127 proto_unregister(&hci_sk_proto); 1130 proto_unregister(&hci_sk_proto);
1128} 1131}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 23b4e242a31a..ff38561385de 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -590,10 +590,8 @@ int __init bt_sysfs_init(void)
590 bt_debugfs = debugfs_create_dir("bluetooth", NULL); 590 bt_debugfs = debugfs_create_dir("bluetooth", NULL);
591 591
592 bt_class = class_create(THIS_MODULE, "bluetooth"); 592 bt_class = class_create(THIS_MODULE, "bluetooth");
593 if (IS_ERR(bt_class))
594 return PTR_ERR(bt_class);
595 593
596 return 0; 594 return PTR_RET(bt_class);
597} 595}
598 596
599void bt_sysfs_cleanup(void) 597void bt_sysfs_cleanup(void)
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index a7352ff3fd1e..2342327f3335 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -311,6 +311,9 @@ static int hidp_get_raw_report(struct hid_device *hid,
311 int numbered_reports = hid->report_enum[report_type].numbered; 311 int numbered_reports = hid->report_enum[report_type].numbered;
312 int ret; 312 int ret;
313 313
314 if (atomic_read(&session->terminate))
315 return -EIO;
316
314 switch (report_type) { 317 switch (report_type) {
315 case HID_FEATURE_REPORT: 318 case HID_FEATURE_REPORT:
316 report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE; 319 report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE;
@@ -722,6 +725,7 @@ static int hidp_session(void *arg)
722 set_current_state(TASK_INTERRUPTIBLE); 725 set_current_state(TASK_INTERRUPTIBLE);
723 } 726 }
724 set_current_state(TASK_RUNNING); 727 set_current_state(TASK_RUNNING);
728 atomic_inc(&session->terminate);
725 remove_wait_queue(sk_sleep(intr_sk), &intr_wait); 729 remove_wait_queue(sk_sleep(intr_sk), &intr_wait);
726 remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); 730 remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
727 731
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 82a829d90b0f..5d0f1ca0a314 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -304,8 +304,6 @@ error:
304void __exit hidp_cleanup_sockets(void) 304void __exit hidp_cleanup_sockets(void)
305{ 305{
306 bt_procfs_cleanup(&init_net, "hidp"); 306 bt_procfs_cleanup(&init_net, "hidp");
307 if (bt_sock_unregister(BTPROTO_HIDP) < 0) 307 bt_sock_unregister(BTPROTO_HIDP);
308 BT_ERR("Can't unregister HIDP socket");
309
310 proto_unregister(&hidp_proto); 308 proto_unregister(&hidp_proto);
311} 309}
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1bcfb8422fdc..7f9704993b74 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1312,8 +1312,6 @@ error:
1312void l2cap_cleanup_sockets(void) 1312void l2cap_cleanup_sockets(void)
1313{ 1313{
1314 bt_procfs_cleanup(&init_net, "l2cap"); 1314 bt_procfs_cleanup(&init_net, "l2cap");
1315 if (bt_sock_unregister(BTPROTO_L2CAP) < 0) 1315 bt_sock_unregister(BTPROTO_L2CAP);
1316 BT_ERR("L2CAP socket unregistration failed");
1317
1318 proto_unregister(&l2cap_proto); 1316 proto_unregister(&l2cap_proto);
1319} 1317}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 39395c7144aa..03e7e732215f 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -384,7 +384,8 @@ static u32 get_supported_settings(struct hci_dev *hdev)
384 384
385 if (lmp_bredr_capable(hdev)) { 385 if (lmp_bredr_capable(hdev)) {
386 settings |= MGMT_SETTING_CONNECTABLE; 386 settings |= MGMT_SETTING_CONNECTABLE;
387 settings |= MGMT_SETTING_FAST_CONNECTABLE; 387 if (hdev->hci_ver >= BLUETOOTH_VER_1_2)
388 settings |= MGMT_SETTING_FAST_CONNECTABLE;
388 settings |= MGMT_SETTING_DISCOVERABLE; 389 settings |= MGMT_SETTING_DISCOVERABLE;
389 settings |= MGMT_SETTING_BREDR; 390 settings |= MGMT_SETTING_BREDR;
390 settings |= MGMT_SETTING_LINK_SECURITY; 391 settings |= MGMT_SETTING_LINK_SECURITY;
@@ -409,6 +410,9 @@ static u32 get_current_settings(struct hci_dev *hdev)
409 if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) 410 if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
410 settings |= MGMT_SETTING_CONNECTABLE; 411 settings |= MGMT_SETTING_CONNECTABLE;
411 412
413 if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
414 settings |= MGMT_SETTING_FAST_CONNECTABLE;
415
412 if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) 416 if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
413 settings |= MGMT_SETTING_DISCOVERABLE; 417 settings |= MGMT_SETTING_DISCOVERABLE;
414 418
@@ -591,32 +595,33 @@ static void create_eir(struct hci_dev *hdev, u8 *data)
591 ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); 595 ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
592} 596}
593 597
594static int update_eir(struct hci_dev *hdev) 598static void update_eir(struct hci_request *req)
595{ 599{
600 struct hci_dev *hdev = req->hdev;
596 struct hci_cp_write_eir cp; 601 struct hci_cp_write_eir cp;
597 602
598 if (!hdev_is_powered(hdev)) 603 if (!hdev_is_powered(hdev))
599 return 0; 604 return;
600 605
601 if (!lmp_ext_inq_capable(hdev)) 606 if (!lmp_ext_inq_capable(hdev))
602 return 0; 607 return;
603 608
604 if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) 609 if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
605 return 0; 610 return;
606 611
607 if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) 612 if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
608 return 0; 613 return;
609 614
610 memset(&cp, 0, sizeof(cp)); 615 memset(&cp, 0, sizeof(cp));
611 616
612 create_eir(hdev, cp.data); 617 create_eir(hdev, cp.data);
613 618
614 if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) 619 if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
615 return 0; 620 return;
616 621
617 memcpy(hdev->eir, cp.data, sizeof(cp.data)); 622 memcpy(hdev->eir, cp.data, sizeof(cp.data));
618 623
619 return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); 624 hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
620} 625}
621 626
622static u8 get_service_classes(struct hci_dev *hdev) 627static u8 get_service_classes(struct hci_dev *hdev)
@@ -630,47 +635,48 @@ static u8 get_service_classes(struct hci_dev *hdev)
630 return val; 635 return val;
631} 636}
632 637
633static int update_class(struct hci_dev *hdev) 638static void update_class(struct hci_request *req)
634{ 639{
640 struct hci_dev *hdev = req->hdev;
635 u8 cod[3]; 641 u8 cod[3];
636 int err;
637 642
638 BT_DBG("%s", hdev->name); 643 BT_DBG("%s", hdev->name);
639 644
640 if (!hdev_is_powered(hdev)) 645 if (!hdev_is_powered(hdev))
641 return 0; 646 return;
642 647
643 if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) 648 if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
644 return 0; 649 return;
645 650
646 cod[0] = hdev->minor_class; 651 cod[0] = hdev->minor_class;
647 cod[1] = hdev->major_class; 652 cod[1] = hdev->major_class;
648 cod[2] = get_service_classes(hdev); 653 cod[2] = get_service_classes(hdev);
649 654
650 if (memcmp(cod, hdev->dev_class, 3) == 0) 655 if (memcmp(cod, hdev->dev_class, 3) == 0)
651 return 0; 656 return;
652
653 err = hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
654 if (err == 0)
655 set_bit(HCI_PENDING_CLASS, &hdev->dev_flags);
656 657
657 return err; 658 hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
658} 659}
659 660
660static void service_cache_off(struct work_struct *work) 661static void service_cache_off(struct work_struct *work)
661{ 662{
662 struct hci_dev *hdev = container_of(work, struct hci_dev, 663 struct hci_dev *hdev = container_of(work, struct hci_dev,
663 service_cache.work); 664 service_cache.work);
665 struct hci_request req;
664 666
665 if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) 667 if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
666 return; 668 return;
667 669
670 hci_req_init(&req, hdev);
671
668 hci_dev_lock(hdev); 672 hci_dev_lock(hdev);
669 673
670 update_eir(hdev); 674 update_eir(&req);
671 update_class(hdev); 675 update_class(&req);
672 676
673 hci_dev_unlock(hdev); 677 hci_dev_unlock(hdev);
678
679 hci_req_run(&req, NULL);
674} 680}
675 681
676static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) 682static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
@@ -994,11 +1000,64 @@ failed:
994 return err; 1000 return err;
995} 1001}
996 1002
1003static void write_fast_connectable(struct hci_request *req, bool enable)
1004{
1005 struct hci_dev *hdev = req->hdev;
1006 struct hci_cp_write_page_scan_activity acp;
1007 u8 type;
1008
1009 if (hdev->hci_ver < BLUETOOTH_VER_1_2)
1010 return;
1011
1012 if (enable) {
1013 type = PAGE_SCAN_TYPE_INTERLACED;
1014
1015 /* 160 msec page scan interval */
1016 acp.interval = __constant_cpu_to_le16(0x0100);
1017 } else {
1018 type = PAGE_SCAN_TYPE_STANDARD; /* default */
1019
1020 /* default 1.28 sec page scan */
1021 acp.interval = __constant_cpu_to_le16(0x0800);
1022 }
1023
1024 acp.window = __constant_cpu_to_le16(0x0012);
1025
1026 if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval ||
1027 __cpu_to_le16(hdev->page_scan_window) != acp.window)
1028 hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY,
1029 sizeof(acp), &acp);
1030
1031 if (hdev->page_scan_type != type)
1032 hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
1033}
1034
1035static void set_connectable_complete(struct hci_dev *hdev, u8 status)
1036{
1037 struct pending_cmd *cmd;
1038
1039 BT_DBG("status 0x%02x", status);
1040
1041 hci_dev_lock(hdev);
1042
1043 cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
1044 if (!cmd)
1045 goto unlock;
1046
1047 send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev);
1048
1049 mgmt_pending_remove(cmd);
1050
1051unlock:
1052 hci_dev_unlock(hdev);
1053}
1054
997static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, 1055static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
998 u16 len) 1056 u16 len)
999{ 1057{
1000 struct mgmt_mode *cp = data; 1058 struct mgmt_mode *cp = data;
1001 struct pending_cmd *cmd; 1059 struct pending_cmd *cmd;
1060 struct hci_request req;
1002 u8 scan; 1061 u8 scan;
1003 int err; 1062 int err;
1004 1063
@@ -1065,7 +1124,20 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
1065 cancel_delayed_work(&hdev->discov_off); 1124 cancel_delayed_work(&hdev->discov_off);
1066 } 1125 }
1067 1126
1068 err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); 1127 hci_req_init(&req, hdev);
1128
1129 hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
1130
1131 /* If we're going from non-connectable to connectable or
1132 * vice-versa when fast connectable is enabled ensure that fast
1133 * connectable gets disabled. write_fast_connectable won't do
1134 * anything if the page scan parameters are already what they
1135 * should be.
1136 */
1137 if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
1138 write_fast_connectable(&req, false);
1139
1140 err = hci_req_run(&req, set_connectable_complete);
1069 if (err < 0) 1141 if (err < 0)
1070 mgmt_pending_remove(cmd); 1142 mgmt_pending_remove(cmd);
1071 1143
@@ -1332,6 +1404,29 @@ unlock:
1332 return err; 1404 return err;
1333} 1405}
1334 1406
1407/* This is a helper function to test for pending mgmt commands that can
1408 * cause CoD or EIR HCI commands. We can only allow one such pending
1409 * mgmt command at a time since otherwise we cannot easily track what
1410 * the current values are, will be, and based on that calculate if a new
1411 * HCI command needs to be sent and if yes with what value.
1412 */
1413static bool pending_eir_or_class(struct hci_dev *hdev)
1414{
1415 struct pending_cmd *cmd;
1416
1417 list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
1418 switch (cmd->opcode) {
1419 case MGMT_OP_ADD_UUID:
1420 case MGMT_OP_REMOVE_UUID:
1421 case MGMT_OP_SET_DEV_CLASS:
1422 case MGMT_OP_SET_POWERED:
1423 return true;
1424 }
1425 }
1426
1427 return false;
1428}
1429
1335static const u8 bluetooth_base_uuid[] = { 1430static const u8 bluetooth_base_uuid[] = {
1336 0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80, 1431 0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80,
1337 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1432 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -1351,10 +1446,37 @@ static u8 get_uuid_size(const u8 *uuid)
1351 return 16; 1446 return 16;
1352} 1447}
1353 1448
1449static void mgmt_class_complete(struct hci_dev *hdev, u16 mgmt_op, u8 status)
1450{
1451 struct pending_cmd *cmd;
1452
1453 hci_dev_lock(hdev);
1454
1455 cmd = mgmt_pending_find(mgmt_op, hdev);
1456 if (!cmd)
1457 goto unlock;
1458
1459 cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status),
1460 hdev->dev_class, 3);
1461
1462 mgmt_pending_remove(cmd);
1463
1464unlock:
1465 hci_dev_unlock(hdev);
1466}
1467
1468static void add_uuid_complete(struct hci_dev *hdev, u8 status)
1469{
1470 BT_DBG("status 0x%02x", status);
1471
1472 mgmt_class_complete(hdev, MGMT_OP_ADD_UUID, status);
1473}
1474
1354static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) 1475static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
1355{ 1476{
1356 struct mgmt_cp_add_uuid *cp = data; 1477 struct mgmt_cp_add_uuid *cp = data;
1357 struct pending_cmd *cmd; 1478 struct pending_cmd *cmd;
1479 struct hci_request req;
1358 struct bt_uuid *uuid; 1480 struct bt_uuid *uuid;
1359 int err; 1481 int err;
1360 1482
@@ -1362,7 +1484,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
1362 1484
1363 hci_dev_lock(hdev); 1485 hci_dev_lock(hdev);
1364 1486
1365 if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { 1487 if (pending_eir_or_class(hdev)) {
1366 err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID, 1488 err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID,
1367 MGMT_STATUS_BUSY); 1489 MGMT_STATUS_BUSY);
1368 goto failed; 1490 goto failed;
@@ -1380,23 +1502,28 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
1380 1502
1381 list_add_tail(&uuid->list, &hdev->uuids); 1503 list_add_tail(&uuid->list, &hdev->uuids);
1382 1504
1383 err = update_class(hdev); 1505 hci_req_init(&req, hdev);
1384 if (err < 0)
1385 goto failed;
1386 1506
1387 err = update_eir(hdev); 1507 update_class(&req);
1388 if (err < 0) 1508 update_eir(&req);
1389 goto failed; 1509
1510 err = hci_req_run(&req, add_uuid_complete);
1511 if (err < 0) {
1512 if (err != -ENODATA)
1513 goto failed;
1390 1514
1391 if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
1392 err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0, 1515 err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0,
1393 hdev->dev_class, 3); 1516 hdev->dev_class, 3);
1394 goto failed; 1517 goto failed;
1395 } 1518 }
1396 1519
1397 cmd = mgmt_pending_add(sk, MGMT_OP_ADD_UUID, hdev, data, len); 1520 cmd = mgmt_pending_add(sk, MGMT_OP_ADD_UUID, hdev, data, len);
1398 if (!cmd) 1521 if (!cmd) {
1399 err = -ENOMEM; 1522 err = -ENOMEM;
1523 goto failed;
1524 }
1525
1526 err = 0;
1400 1527
1401failed: 1528failed:
1402 hci_dev_unlock(hdev); 1529 hci_dev_unlock(hdev);
@@ -1417,6 +1544,13 @@ static bool enable_service_cache(struct hci_dev *hdev)
1417 return false; 1544 return false;
1418} 1545}
1419 1546
1547static void remove_uuid_complete(struct hci_dev *hdev, u8 status)
1548{
1549 BT_DBG("status 0x%02x", status);
1550
1551 mgmt_class_complete(hdev, MGMT_OP_REMOVE_UUID, status);
1552}
1553
1420static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, 1554static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
1421 u16 len) 1555 u16 len)
1422{ 1556{
@@ -1424,13 +1558,14 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
1424 struct pending_cmd *cmd; 1558 struct pending_cmd *cmd;
1425 struct bt_uuid *match, *tmp; 1559 struct bt_uuid *match, *tmp;
1426 u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 1560 u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1561 struct hci_request req;
1427 int err, found; 1562 int err, found;
1428 1563
1429 BT_DBG("request for %s", hdev->name); 1564 BT_DBG("request for %s", hdev->name);
1430 1565
1431 hci_dev_lock(hdev); 1566 hci_dev_lock(hdev);
1432 1567
1433 if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { 1568 if (pending_eir_or_class(hdev)) {
1434 err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, 1569 err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID,
1435 MGMT_STATUS_BUSY); 1570 MGMT_STATUS_BUSY);
1436 goto unlock; 1571 goto unlock;
@@ -1466,34 +1601,47 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
1466 } 1601 }
1467 1602
1468update_class: 1603update_class:
1469 err = update_class(hdev); 1604 hci_req_init(&req, hdev);
1470 if (err < 0)
1471 goto unlock;
1472 1605
1473 err = update_eir(hdev); 1606 update_class(&req);
1474 if (err < 0) 1607 update_eir(&req);
1475 goto unlock; 1608
1609 err = hci_req_run(&req, remove_uuid_complete);
1610 if (err < 0) {
1611 if (err != -ENODATA)
1612 goto unlock;
1476 1613
1477 if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
1478 err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0, 1614 err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0,
1479 hdev->dev_class, 3); 1615 hdev->dev_class, 3);
1480 goto unlock; 1616 goto unlock;
1481 } 1617 }
1482 1618
1483 cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_UUID, hdev, data, len); 1619 cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_UUID, hdev, data, len);
1484 if (!cmd) 1620 if (!cmd) {
1485 err = -ENOMEM; 1621 err = -ENOMEM;
1622 goto unlock;
1623 }
1624
1625 err = 0;
1486 1626
1487unlock: 1627unlock:
1488 hci_dev_unlock(hdev); 1628 hci_dev_unlock(hdev);
1489 return err; 1629 return err;
1490} 1630}
1491 1631
1632static void set_class_complete(struct hci_dev *hdev, u8 status)
1633{
1634 BT_DBG("status 0x%02x", status);
1635
1636 mgmt_class_complete(hdev, MGMT_OP_SET_DEV_CLASS, status);
1637}
1638
1492static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, 1639static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
1493 u16 len) 1640 u16 len)
1494{ 1641{
1495 struct mgmt_cp_set_dev_class *cp = data; 1642 struct mgmt_cp_set_dev_class *cp = data;
1496 struct pending_cmd *cmd; 1643 struct pending_cmd *cmd;
1644 struct hci_request req;
1497 int err; 1645 int err;
1498 1646
1499 BT_DBG("request for %s", hdev->name); 1647 BT_DBG("request for %s", hdev->name);
@@ -1502,15 +1650,19 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
1502 return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 1650 return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
1503 MGMT_STATUS_NOT_SUPPORTED); 1651 MGMT_STATUS_NOT_SUPPORTED);
1504 1652
1505 if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) 1653 hci_dev_lock(hdev);
1506 return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
1507 MGMT_STATUS_BUSY);
1508 1654
1509 if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) 1655 if (pending_eir_or_class(hdev)) {
1510 return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 1656 err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
1511 MGMT_STATUS_INVALID_PARAMS); 1657 MGMT_STATUS_BUSY);
1658 goto unlock;
1659 }
1512 1660
1513 hci_dev_lock(hdev); 1661 if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) {
1662 err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
1663 MGMT_STATUS_INVALID_PARAMS);
1664 goto unlock;
1665 }
1514 1666
1515 hdev->major_class = cp->major; 1667 hdev->major_class = cp->major;
1516 hdev->minor_class = cp->minor; 1668 hdev->minor_class = cp->minor;
@@ -1521,26 +1673,34 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
1521 goto unlock; 1673 goto unlock;
1522 } 1674 }
1523 1675
1676 hci_req_init(&req, hdev);
1677
1524 if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) { 1678 if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) {
1525 hci_dev_unlock(hdev); 1679 hci_dev_unlock(hdev);
1526 cancel_delayed_work_sync(&hdev->service_cache); 1680 cancel_delayed_work_sync(&hdev->service_cache);
1527 hci_dev_lock(hdev); 1681 hci_dev_lock(hdev);
1528 update_eir(hdev); 1682 update_eir(&req);
1529 } 1683 }
1530 1684
1531 err = update_class(hdev); 1685 update_class(&req);
1532 if (err < 0) 1686
1533 goto unlock; 1687 err = hci_req_run(&req, set_class_complete);
1688 if (err < 0) {
1689 if (err != -ENODATA)
1690 goto unlock;
1534 1691
1535 if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
1536 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, 1692 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0,
1537 hdev->dev_class, 3); 1693 hdev->dev_class, 3);
1538 goto unlock; 1694 goto unlock;
1539 } 1695 }
1540 1696
1541 cmd = mgmt_pending_add(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len); 1697 cmd = mgmt_pending_add(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len);
1542 if (!cmd) 1698 if (!cmd) {
1543 err = -ENOMEM; 1699 err = -ENOMEM;
1700 goto unlock;
1701 }
1702
1703 err = 0;
1544 1704
1545unlock: 1705unlock:
1546 hci_dev_unlock(hdev); 1706 hci_dev_unlock(hdev);
@@ -2140,7 +2300,7 @@ unlock:
2140} 2300}
2141 2301
2142static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, 2302static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
2143 bdaddr_t *bdaddr, u8 type, u16 mgmt_op, 2303 struct mgmt_addr_info *addr, u16 mgmt_op,
2144 u16 hci_op, __le32 passkey) 2304 u16 hci_op, __le32 passkey)
2145{ 2305{
2146 struct pending_cmd *cmd; 2306 struct pending_cmd *cmd;
@@ -2150,37 +2310,41 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
2150 hci_dev_lock(hdev); 2310 hci_dev_lock(hdev);
2151 2311
2152 if (!hdev_is_powered(hdev)) { 2312 if (!hdev_is_powered(hdev)) {
2153 err = cmd_status(sk, hdev->id, mgmt_op, 2313 err = cmd_complete(sk, hdev->id, mgmt_op,
2154 MGMT_STATUS_NOT_POWERED); 2314 MGMT_STATUS_NOT_POWERED, addr,
2315 sizeof(*addr));
2155 goto done; 2316 goto done;
2156 } 2317 }
2157 2318
2158 if (type == BDADDR_BREDR) 2319 if (addr->type == BDADDR_BREDR)
2159 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, bdaddr); 2320 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr);
2160 else 2321 else
2161 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr); 2322 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr);
2162 2323
2163 if (!conn) { 2324 if (!conn) {
2164 err = cmd_status(sk, hdev->id, mgmt_op, 2325 err = cmd_complete(sk, hdev->id, mgmt_op,
2165 MGMT_STATUS_NOT_CONNECTED); 2326 MGMT_STATUS_NOT_CONNECTED, addr,
2327 sizeof(*addr));
2166 goto done; 2328 goto done;
2167 } 2329 }
2168 2330
2169 if (type == BDADDR_LE_PUBLIC || type == BDADDR_LE_RANDOM) { 2331 if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {
2170 /* Continue with pairing via SMP */ 2332 /* Continue with pairing via SMP */
2171 err = smp_user_confirm_reply(conn, mgmt_op, passkey); 2333 err = smp_user_confirm_reply(conn, mgmt_op, passkey);
2172 2334
2173 if (!err) 2335 if (!err)
2174 err = cmd_status(sk, hdev->id, mgmt_op, 2336 err = cmd_complete(sk, hdev->id, mgmt_op,
2175 MGMT_STATUS_SUCCESS); 2337 MGMT_STATUS_SUCCESS, addr,
2338 sizeof(*addr));
2176 else 2339 else
2177 err = cmd_status(sk, hdev->id, mgmt_op, 2340 err = cmd_complete(sk, hdev->id, mgmt_op,
2178 MGMT_STATUS_FAILED); 2341 MGMT_STATUS_FAILED, addr,
2342 sizeof(*addr));
2179 2343
2180 goto done; 2344 goto done;
2181 } 2345 }
2182 2346
2183 cmd = mgmt_pending_add(sk, mgmt_op, hdev, bdaddr, sizeof(*bdaddr)); 2347 cmd = mgmt_pending_add(sk, mgmt_op, hdev, addr, sizeof(*addr));
2184 if (!cmd) { 2348 if (!cmd) {
2185 err = -ENOMEM; 2349 err = -ENOMEM;
2186 goto done; 2350 goto done;
@@ -2190,11 +2354,12 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
2190 if (hci_op == HCI_OP_USER_PASSKEY_REPLY) { 2354 if (hci_op == HCI_OP_USER_PASSKEY_REPLY) {
2191 struct hci_cp_user_passkey_reply cp; 2355 struct hci_cp_user_passkey_reply cp;
2192 2356
2193 bacpy(&cp.bdaddr, bdaddr); 2357 bacpy(&cp.bdaddr, &addr->bdaddr);
2194 cp.passkey = passkey; 2358 cp.passkey = passkey;
2195 err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp); 2359 err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp);
2196 } else 2360 } else
2197 err = hci_send_cmd(hdev, hci_op, sizeof(*bdaddr), bdaddr); 2361 err = hci_send_cmd(hdev, hci_op, sizeof(addr->bdaddr),
2362 &addr->bdaddr);
2198 2363
2199 if (err < 0) 2364 if (err < 0)
2200 mgmt_pending_remove(cmd); 2365 mgmt_pending_remove(cmd);
@@ -2211,7 +2376,7 @@ static int pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev,
2211 2376
2212 BT_DBG(""); 2377 BT_DBG("");
2213 2378
2214 return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, 2379 return user_pairing_resp(sk, hdev, &cp->addr,
2215 MGMT_OP_PIN_CODE_NEG_REPLY, 2380 MGMT_OP_PIN_CODE_NEG_REPLY,
2216 HCI_OP_PIN_CODE_NEG_REPLY, 0); 2381 HCI_OP_PIN_CODE_NEG_REPLY, 0);
2217} 2382}
@@ -2227,7 +2392,7 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data,
2227 return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY, 2392 return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY,
2228 MGMT_STATUS_INVALID_PARAMS); 2393 MGMT_STATUS_INVALID_PARAMS);
2229 2394
2230 return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, 2395 return user_pairing_resp(sk, hdev, &cp->addr,
2231 MGMT_OP_USER_CONFIRM_REPLY, 2396 MGMT_OP_USER_CONFIRM_REPLY,
2232 HCI_OP_USER_CONFIRM_REPLY, 0); 2397 HCI_OP_USER_CONFIRM_REPLY, 0);
2233} 2398}
@@ -2239,7 +2404,7 @@ static int user_confirm_neg_reply(struct sock *sk, struct hci_dev *hdev,
2239 2404
2240 BT_DBG(""); 2405 BT_DBG("");
2241 2406
2242 return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, 2407 return user_pairing_resp(sk, hdev, &cp->addr,
2243 MGMT_OP_USER_CONFIRM_NEG_REPLY, 2408 MGMT_OP_USER_CONFIRM_NEG_REPLY,
2244 HCI_OP_USER_CONFIRM_NEG_REPLY, 0); 2409 HCI_OP_USER_CONFIRM_NEG_REPLY, 0);
2245} 2410}
@@ -2251,7 +2416,7 @@ static int user_passkey_reply(struct sock *sk, struct hci_dev *hdev, void *data,
2251 2416
2252 BT_DBG(""); 2417 BT_DBG("");
2253 2418
2254 return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, 2419 return user_pairing_resp(sk, hdev, &cp->addr,
2255 MGMT_OP_USER_PASSKEY_REPLY, 2420 MGMT_OP_USER_PASSKEY_REPLY,
2256 HCI_OP_USER_PASSKEY_REPLY, cp->passkey); 2421 HCI_OP_USER_PASSKEY_REPLY, cp->passkey);
2257} 2422}
@@ -2263,18 +2428,47 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev,
2263 2428
2264 BT_DBG(""); 2429 BT_DBG("");
2265 2430
2266 return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, 2431 return user_pairing_resp(sk, hdev, &cp->addr,
2267 MGMT_OP_USER_PASSKEY_NEG_REPLY, 2432 MGMT_OP_USER_PASSKEY_NEG_REPLY,
2268 HCI_OP_USER_PASSKEY_NEG_REPLY, 0); 2433 HCI_OP_USER_PASSKEY_NEG_REPLY, 0);
2269} 2434}
2270 2435
2271static int update_name(struct hci_dev *hdev, const char *name) 2436static void update_name(struct hci_request *req)
2272{ 2437{
2438 struct hci_dev *hdev = req->hdev;
2273 struct hci_cp_write_local_name cp; 2439 struct hci_cp_write_local_name cp;
2274 2440
2275 memcpy(cp.name, name, sizeof(cp.name)); 2441 memcpy(cp.name, hdev->dev_name, sizeof(cp.name));
2442
2443 hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
2444}
2445
2446static void set_name_complete(struct hci_dev *hdev, u8 status)
2447{
2448 struct mgmt_cp_set_local_name *cp;
2449 struct pending_cmd *cmd;
2450
2451 BT_DBG("status 0x%02x", status);
2452
2453 hci_dev_lock(hdev);
2454
2455 cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev);
2456 if (!cmd)
2457 goto unlock;
2458
2459 cp = cmd->param;
2276 2460
2277 return hci_send_cmd(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); 2461 if (status)
2462 cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
2463 mgmt_status(status));
2464 else
2465 cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
2466 cp, sizeof(*cp));
2467
2468 mgmt_pending_remove(cmd);
2469
2470unlock:
2471 hci_dev_unlock(hdev);
2278} 2472}
2279 2473
2280static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, 2474static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -2282,12 +2476,24 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
2282{ 2476{
2283 struct mgmt_cp_set_local_name *cp = data; 2477 struct mgmt_cp_set_local_name *cp = data;
2284 struct pending_cmd *cmd; 2478 struct pending_cmd *cmd;
2479 struct hci_request req;
2285 int err; 2480 int err;
2286 2481
2287 BT_DBG(""); 2482 BT_DBG("");
2288 2483
2289 hci_dev_lock(hdev); 2484 hci_dev_lock(hdev);
2290 2485
2486 /* If the old values are the same as the new ones just return a
2487 * direct command complete event.
2488 */
2489 if (!memcmp(hdev->dev_name, cp->name, sizeof(hdev->dev_name)) &&
2490 !memcmp(hdev->short_name, cp->short_name,
2491 sizeof(hdev->short_name))) {
2492 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
2493 data, len);
2494 goto failed;
2495 }
2496
2291 memcpy(hdev->short_name, cp->short_name, sizeof(hdev->short_name)); 2497 memcpy(hdev->short_name, cp->short_name, sizeof(hdev->short_name));
2292 2498
2293 if (!hdev_is_powered(hdev)) { 2499 if (!hdev_is_powered(hdev)) {
@@ -2310,7 +2516,19 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
2310 goto failed; 2516 goto failed;
2311 } 2517 }
2312 2518
2313 err = update_name(hdev, cp->name); 2519 memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name));
2520
2521 hci_req_init(&req, hdev);
2522
2523 if (lmp_bredr_capable(hdev)) {
2524 update_name(&req);
2525 update_eir(&req);
2526 }
2527
2528 if (lmp_le_capable(hdev))
2529 hci_update_ad(&req);
2530
2531 err = hci_req_run(&req, set_name_complete);
2314 if (err < 0) 2532 if (err < 0)
2315 mgmt_pending_remove(cmd); 2533 mgmt_pending_remove(cmd);
2316 2534
@@ -2698,6 +2916,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
2698 u16 len) 2916 u16 len)
2699{ 2917{
2700 struct mgmt_cp_set_device_id *cp = data; 2918 struct mgmt_cp_set_device_id *cp = data;
2919 struct hci_request req;
2701 int err; 2920 int err;
2702 __u16 source; 2921 __u16 source;
2703 2922
@@ -2718,24 +2937,59 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
2718 2937
2719 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0); 2938 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0);
2720 2939
2721 update_eir(hdev); 2940 hci_req_init(&req, hdev);
2941 update_eir(&req);
2942 hci_req_run(&req, NULL);
2722 2943
2723 hci_dev_unlock(hdev); 2944 hci_dev_unlock(hdev);
2724 2945
2725 return err; 2946 return err;
2726} 2947}
2727 2948
2949static void fast_connectable_complete(struct hci_dev *hdev, u8 status)
2950{
2951 struct pending_cmd *cmd;
2952
2953 BT_DBG("status 0x%02x", status);
2954
2955 hci_dev_lock(hdev);
2956
2957 cmd = mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev);
2958 if (!cmd)
2959 goto unlock;
2960
2961 if (status) {
2962 cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
2963 mgmt_status(status));
2964 } else {
2965 struct mgmt_mode *cp = cmd->param;
2966
2967 if (cp->val)
2968 set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
2969 else
2970 clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
2971
2972 send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev);
2973 new_settings(hdev, cmd->sk);
2974 }
2975
2976 mgmt_pending_remove(cmd);
2977
2978unlock:
2979 hci_dev_unlock(hdev);
2980}
2981
2728static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, 2982static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
2729 void *data, u16 len) 2983 void *data, u16 len)
2730{ 2984{
2731 struct mgmt_mode *cp = data; 2985 struct mgmt_mode *cp = data;
2732 struct hci_cp_write_page_scan_activity acp; 2986 struct pending_cmd *cmd;
2733 u8 type; 2987 struct hci_request req;
2734 int err; 2988 int err;
2735 2989
2736 BT_DBG("%s", hdev->name); 2990 BT_DBG("%s", hdev->name);
2737 2991
2738 if (!lmp_bredr_capable(hdev)) 2992 if (!lmp_bredr_capable(hdev) || hdev->hci_ver < BLUETOOTH_VER_1_2)
2739 return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 2993 return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
2740 MGMT_STATUS_NOT_SUPPORTED); 2994 MGMT_STATUS_NOT_SUPPORTED);
2741 2995
@@ -2753,40 +3007,39 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
2753 3007
2754 hci_dev_lock(hdev); 3008 hci_dev_lock(hdev);
2755 3009
2756 if (cp->val) { 3010 if (mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) {
2757 type = PAGE_SCAN_TYPE_INTERLACED; 3011 err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
3012 MGMT_STATUS_BUSY);
3013 goto unlock;
3014 }
2758 3015
2759 /* 160 msec page scan interval */ 3016 if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) {
2760 acp.interval = __constant_cpu_to_le16(0x0100); 3017 err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE,
2761 } else { 3018 hdev);
2762 type = PAGE_SCAN_TYPE_STANDARD; /* default */ 3019 goto unlock;
3020 }
2763 3021
2764 /* default 1.28 sec page scan */ 3022 cmd = mgmt_pending_add(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev,
2765 acp.interval = __constant_cpu_to_le16(0x0800); 3023 data, len);
3024 if (!cmd) {
3025 err = -ENOMEM;
3026 goto unlock;
2766 } 3027 }
2767 3028
2768 /* default 11.25 msec page scan window */ 3029 hci_req_init(&req, hdev);
2769 acp.window = __constant_cpu_to_le16(0x0012);
2770 3030
2771 err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, sizeof(acp), 3031 write_fast_connectable(&req, cp->val);
2772 &acp);
2773 if (err < 0) {
2774 err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
2775 MGMT_STATUS_FAILED);
2776 goto done;
2777 }
2778 3032
2779 err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); 3033 err = hci_req_run(&req, fast_connectable_complete);
2780 if (err < 0) { 3034 if (err < 0) {
2781 err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 3035 err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
2782 MGMT_STATUS_FAILED); 3036 MGMT_STATUS_FAILED);
2783 goto done; 3037 mgmt_pending_remove(cmd);
2784 } 3038 }
2785 3039
2786 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 0, 3040unlock:
2787 NULL, 0);
2788done:
2789 hci_dev_unlock(hdev); 3041 hci_dev_unlock(hdev);
3042
2790 return err; 3043 return err;
2791} 3044}
2792 3045
@@ -3043,79 +3296,115 @@ static void settings_rsp(struct pending_cmd *cmd, void *data)
3043 mgmt_pending_free(cmd); 3296 mgmt_pending_free(cmd);
3044} 3297}
3045 3298
3046static int set_bredr_scan(struct hci_dev *hdev) 3299static void set_bredr_scan(struct hci_request *req)
3047{ 3300{
3301 struct hci_dev *hdev = req->hdev;
3048 u8 scan = 0; 3302 u8 scan = 0;
3049 3303
3304 /* Ensure that fast connectable is disabled. This function will
3305 * not do anything if the page scan parameters are already what
3306 * they should be.
3307 */
3308 write_fast_connectable(req, false);
3309
3050 if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) 3310 if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
3051 scan |= SCAN_PAGE; 3311 scan |= SCAN_PAGE;
3052 if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) 3312 if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
3053 scan |= SCAN_INQUIRY; 3313 scan |= SCAN_INQUIRY;
3054 3314
3055 if (!scan) 3315 if (scan)
3056 return 0; 3316 hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
3057
3058 return hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
3059} 3317}
3060 3318
3061int mgmt_powered(struct hci_dev *hdev, u8 powered) 3319static void powered_complete(struct hci_dev *hdev, u8 status)
3062{ 3320{
3063 struct cmd_lookup match = { NULL, hdev }; 3321 struct cmd_lookup match = { NULL, hdev };
3064 int err;
3065 3322
3066 if (!test_bit(HCI_MGMT, &hdev->dev_flags)) 3323 BT_DBG("status 0x%02x", status);
3067 return 0; 3324
3325 hci_dev_lock(hdev);
3068 3326
3069 mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); 3327 mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
3070 3328
3071 if (powered) { 3329 new_settings(hdev, match.sk);
3072 u8 link_sec;
3073 3330
3074 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && 3331 hci_dev_unlock(hdev);
3075 !lmp_host_ssp_capable(hdev)) {
3076 u8 ssp = 1;
3077 3332
3078 hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); 3333 if (match.sk)
3079 } 3334 sock_put(match.sk);
3335}
3080 3336
3081 if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { 3337static int powered_update_hci(struct hci_dev *hdev)
3082 struct hci_cp_write_le_host_supported cp; 3338{
3339 struct hci_request req;
3340 u8 link_sec;
3083 3341
3084 cp.le = 1; 3342 hci_req_init(&req, hdev);
3085 cp.simul = lmp_le_br_capable(hdev);
3086 3343
3087 /* Check first if we already have the right 3344 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
3088 * host state (host features set) 3345 !lmp_host_ssp_capable(hdev)) {
3089 */ 3346 u8 ssp = 1;
3090 if (cp.le != lmp_host_le_capable(hdev) ||
3091 cp.simul != lmp_host_le_br_capable(hdev))
3092 hci_send_cmd(hdev,
3093 HCI_OP_WRITE_LE_HOST_SUPPORTED,
3094 sizeof(cp), &cp);
3095 }
3096 3347
3097 link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); 3348 hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
3098 if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) 3349 }
3099 hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE,
3100 sizeof(link_sec), &link_sec);
3101 3350
3102 if (lmp_bredr_capable(hdev)) { 3351 if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
3103 set_bredr_scan(hdev); 3352 struct hci_cp_write_le_host_supported cp;
3104 update_class(hdev);
3105 update_name(hdev, hdev->dev_name);
3106 update_eir(hdev);
3107 }
3108 } else {
3109 u8 status = MGMT_STATUS_NOT_POWERED;
3110 u8 zero_cod[] = { 0, 0, 0 };
3111 3353
3112 mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); 3354 cp.le = 1;
3355 cp.simul = lmp_le_br_capable(hdev);
3113 3356
3114 if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) 3357 /* Check first if we already have the right
3115 mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, 3358 * host state (host features set)
3116 zero_cod, sizeof(zero_cod), NULL); 3359 */
3360 if (cp.le != lmp_host_le_capable(hdev) ||
3361 cp.simul != lmp_host_le_br_capable(hdev))
3362 hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED,
3363 sizeof(cp), &cp);
3117 } 3364 }
3118 3365
3366 link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
3367 if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
3368 hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE,
3369 sizeof(link_sec), &link_sec);
3370
3371 if (lmp_bredr_capable(hdev)) {
3372 set_bredr_scan(&req);
3373 update_class(&req);
3374 update_name(&req);
3375 update_eir(&req);
3376 }
3377
3378 return hci_req_run(&req, powered_complete);
3379}
3380
3381int mgmt_powered(struct hci_dev *hdev, u8 powered)
3382{
3383 struct cmd_lookup match = { NULL, hdev };
3384 u8 status_not_powered = MGMT_STATUS_NOT_POWERED;
3385 u8 zero_cod[] = { 0, 0, 0 };
3386 int err;
3387
3388 if (!test_bit(HCI_MGMT, &hdev->dev_flags))
3389 return 0;
3390
3391 if (powered) {
3392 if (powered_update_hci(hdev) == 0)
3393 return 0;
3394
3395 mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp,
3396 &match);
3397 goto new_settings;
3398 }
3399
3400 mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
3401 mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status_not_powered);
3402
3403 if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
3404 mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
3405 zero_cod, sizeof(zero_cod), NULL);
3406
3407new_settings:
3119 err = new_settings(hdev, match.sk); 3408 err = new_settings(hdev, match.sk);
3120 3409
3121 if (match.sk) 3410 if (match.sk)
@@ -3152,7 +3441,7 @@ int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable)
3152 3441
3153int mgmt_connectable(struct hci_dev *hdev, u8 connectable) 3442int mgmt_connectable(struct hci_dev *hdev, u8 connectable)
3154{ 3443{
3155 struct cmd_lookup match = { NULL, hdev }; 3444 struct pending_cmd *cmd;
3156 bool changed = false; 3445 bool changed = false;
3157 int err = 0; 3446 int err = 0;
3158 3447
@@ -3164,14 +3453,10 @@ int mgmt_connectable(struct hci_dev *hdev, u8 connectable)
3164 changed = true; 3453 changed = true;
3165 } 3454 }
3166 3455
3167 mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev, settings_rsp, 3456 cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
3168 &match);
3169 3457
3170 if (changed) 3458 if (changed)
3171 err = new_settings(hdev, match.sk); 3459 err = new_settings(hdev, cmd ? cmd->sk : NULL);
3172
3173 if (match.sk)
3174 sock_put(match.sk);
3175 3460
3176 return err; 3461 return err;
3177} 3462}
@@ -3555,23 +3840,25 @@ int mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
3555 return err; 3840 return err;
3556} 3841}
3557 3842
3558static int clear_eir(struct hci_dev *hdev) 3843static void clear_eir(struct hci_request *req)
3559{ 3844{
3845 struct hci_dev *hdev = req->hdev;
3560 struct hci_cp_write_eir cp; 3846 struct hci_cp_write_eir cp;
3561 3847
3562 if (!lmp_ext_inq_capable(hdev)) 3848 if (!lmp_ext_inq_capable(hdev))
3563 return 0; 3849 return;
3564 3850
3565 memset(hdev->eir, 0, sizeof(hdev->eir)); 3851 memset(hdev->eir, 0, sizeof(hdev->eir));
3566 3852
3567 memset(&cp, 0, sizeof(cp)); 3853 memset(&cp, 0, sizeof(cp));
3568 3854
3569 return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); 3855 hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
3570} 3856}
3571 3857
3572int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) 3858int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
3573{ 3859{
3574 struct cmd_lookup match = { NULL, hdev }; 3860 struct cmd_lookup match = { NULL, hdev };
3861 struct hci_request req;
3575 bool changed = false; 3862 bool changed = false;
3576 int err = 0; 3863 int err = 0;
3577 3864
@@ -3604,29 +3891,26 @@ int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
3604 if (match.sk) 3891 if (match.sk)
3605 sock_put(match.sk); 3892 sock_put(match.sk);
3606 3893
3894 hci_req_init(&req, hdev);
3895
3607 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) 3896 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
3608 update_eir(hdev); 3897 update_eir(&req);
3609 else 3898 else
3610 clear_eir(hdev); 3899 clear_eir(&req);
3900
3901 hci_req_run(&req, NULL);
3611 3902
3612 return err; 3903 return err;
3613} 3904}
3614 3905
3615static void class_rsp(struct pending_cmd *cmd, void *data) 3906static void sk_lookup(struct pending_cmd *cmd, void *data)
3616{ 3907{
3617 struct cmd_lookup *match = data; 3908 struct cmd_lookup *match = data;
3618 3909
3619 cmd_complete(cmd->sk, cmd->index, cmd->opcode, match->mgmt_status,
3620 match->hdev->dev_class, 3);
3621
3622 list_del(&cmd->list);
3623
3624 if (match->sk == NULL) { 3910 if (match->sk == NULL) {
3625 match->sk = cmd->sk; 3911 match->sk = cmd->sk;
3626 sock_hold(match->sk); 3912 sock_hold(match->sk);
3627 } 3913 }
3628
3629 mgmt_pending_free(cmd);
3630} 3914}
3631 3915
3632int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, 3916int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
@@ -3635,11 +3919,9 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
3635 struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; 3919 struct cmd_lookup match = { NULL, hdev, mgmt_status(status) };
3636 int err = 0; 3920 int err = 0;
3637 3921
3638 clear_bit(HCI_PENDING_CLASS, &hdev->dev_flags); 3922 mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match);
3639 3923 mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match);
3640 mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, class_rsp, &match); 3924 mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
3641 mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, class_rsp, &match);
3642 mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, class_rsp, &match);
3643 3925
3644 if (!status) 3926 if (!status)
3645 err = mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, 3927 err = mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class,
@@ -3653,55 +3935,29 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
3653 3935
3654int mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) 3936int mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)
3655{ 3937{
3656 struct pending_cmd *cmd;
3657 struct mgmt_cp_set_local_name ev; 3938 struct mgmt_cp_set_local_name ev;
3658 bool changed = false; 3939 struct pending_cmd *cmd;
3659 int err = 0;
3660 3940
3661 if (memcmp(name, hdev->dev_name, sizeof(hdev->dev_name)) != 0) { 3941 if (status)
3662 memcpy(hdev->dev_name, name, sizeof(hdev->dev_name)); 3942 return 0;
3663 changed = true;
3664 }
3665 3943
3666 memset(&ev, 0, sizeof(ev)); 3944 memset(&ev, 0, sizeof(ev));
3667 memcpy(ev.name, name, HCI_MAX_NAME_LENGTH); 3945 memcpy(ev.name, name, HCI_MAX_NAME_LENGTH);
3668 memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH); 3946 memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH);
3669 3947
3670 cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); 3948 cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev);
3671 if (!cmd) 3949 if (!cmd) {
3672 goto send_event; 3950 memcpy(hdev->dev_name, name, sizeof(hdev->dev_name));
3673
3674 /* Always assume that either the short or the complete name has
3675 * changed if there was a pending mgmt command */
3676 changed = true;
3677 3951
3678 if (status) { 3952 /* If this is a HCI command related to powering on the
3679 err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 3953 * HCI dev don't send any mgmt signals.
3680 mgmt_status(status)); 3954 */
3681 goto failed; 3955 if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
3956 return 0;
3682 } 3957 }
3683 3958
3684 err = cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, &ev, 3959 return mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev),
3685 sizeof(ev)); 3960 cmd ? cmd->sk : NULL);
3686 if (err < 0)
3687 goto failed;
3688
3689send_event:
3690 if (changed)
3691 err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev,
3692 sizeof(ev), cmd ? cmd->sk : NULL);
3693
3694 /* EIR is taken care of separately when powering on the
3695 * adapter so only update them here if this is a name change
3696 * unrelated to power on.
3697 */
3698 if (!test_bit(HCI_INIT, &hdev->flags))
3699 update_eir(hdev);
3700
3701failed:
3702 if (cmd)
3703 mgmt_pending_remove(cmd);
3704 return err;
3705} 3961}
3706 3962
3707int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash, 3963int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash,
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index b23e2713fea8..ca957d34b0c8 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -69,7 +69,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
69 u8 sec_level, 69 u8 sec_level,
70 int *err); 70 int *err);
71static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst); 71static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst);
72static void rfcomm_session_del(struct rfcomm_session *s); 72static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s);
73 73
74/* ---- RFCOMM frame parsing macros ---- */ 74/* ---- RFCOMM frame parsing macros ---- */
75#define __get_dlci(b) ((b & 0xfc) >> 2) 75#define __get_dlci(b) ((b & 0xfc) >> 2)
@@ -108,12 +108,6 @@ static void rfcomm_schedule(void)
108 wake_up_process(rfcomm_thread); 108 wake_up_process(rfcomm_thread);
109} 109}
110 110
111static void rfcomm_session_put(struct rfcomm_session *s)
112{
113 if (atomic_dec_and_test(&s->refcnt))
114 rfcomm_session_del(s);
115}
116
117/* ---- RFCOMM FCS computation ---- */ 111/* ---- RFCOMM FCS computation ---- */
118 112
119/* reversed, 8-bit, poly=0x07 */ 113/* reversed, 8-bit, poly=0x07 */
@@ -249,16 +243,14 @@ static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)
249{ 243{
250 BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout); 244 BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout);
251 245
252 if (!mod_timer(&s->timer, jiffies + timeout)) 246 mod_timer(&s->timer, jiffies + timeout);
253 rfcomm_session_hold(s);
254} 247}
255 248
256static void rfcomm_session_clear_timer(struct rfcomm_session *s) 249static void rfcomm_session_clear_timer(struct rfcomm_session *s)
257{ 250{
258 BT_DBG("session %p state %ld", s, s->state); 251 BT_DBG("session %p state %ld", s, s->state);
259 252
260 if (del_timer(&s->timer)) 253 del_timer_sync(&s->timer);
261 rfcomm_session_put(s);
262} 254}
263 255
264/* ---- RFCOMM DLCs ---- */ 256/* ---- RFCOMM DLCs ---- */
@@ -336,8 +328,6 @@ static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d)
336{ 328{
337 BT_DBG("dlc %p session %p", d, s); 329 BT_DBG("dlc %p session %p", d, s);
338 330
339 rfcomm_session_hold(s);
340
341 rfcomm_session_clear_timer(s); 331 rfcomm_session_clear_timer(s);
342 rfcomm_dlc_hold(d); 332 rfcomm_dlc_hold(d);
343 list_add(&d->list, &s->dlcs); 333 list_add(&d->list, &s->dlcs);
@@ -356,8 +346,6 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d)
356 346
357 if (list_empty(&s->dlcs)) 347 if (list_empty(&s->dlcs))
358 rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT); 348 rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT);
359
360 rfcomm_session_put(s);
361} 349}
362 350
363static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci) 351static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci)
@@ -493,12 +481,34 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
493 481
494int rfcomm_dlc_close(struct rfcomm_dlc *d, int err) 482int rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
495{ 483{
496 int r; 484 int r = 0;
485 struct rfcomm_dlc *d_list;
486 struct rfcomm_session *s, *s_list;
487
488 BT_DBG("dlc %p state %ld dlci %d err %d", d, d->state, d->dlci, err);
497 489
498 rfcomm_lock(); 490 rfcomm_lock();
499 491
500 r = __rfcomm_dlc_close(d, err); 492 s = d->session;
493 if (!s)
494 goto no_session;
495
496 /* after waiting on the mutex check the session still exists
497 * then check the dlc still exists
498 */
499 list_for_each_entry(s_list, &session_list, list) {
500 if (s_list == s) {
501 list_for_each_entry(d_list, &s->dlcs, list) {
502 if (d_list == d) {
503 r = __rfcomm_dlc_close(d, err);
504 break;
505 }
506 }
507 break;
508 }
509 }
501 510
511no_session:
502 rfcomm_unlock(); 512 rfcomm_unlock();
503 return r; 513 return r;
504} 514}
@@ -609,7 +619,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
609 return s; 619 return s;
610} 620}
611 621
612static void rfcomm_session_del(struct rfcomm_session *s) 622static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s)
613{ 623{
614 int state = s->state; 624 int state = s->state;
615 625
@@ -617,15 +627,14 @@ static void rfcomm_session_del(struct rfcomm_session *s)
617 627
618 list_del(&s->list); 628 list_del(&s->list);
619 629
620 if (state == BT_CONNECTED)
621 rfcomm_send_disc(s, 0);
622
623 rfcomm_session_clear_timer(s); 630 rfcomm_session_clear_timer(s);
624 sock_release(s->sock); 631 sock_release(s->sock);
625 kfree(s); 632 kfree(s);
626 633
627 if (state != BT_LISTEN) 634 if (state != BT_LISTEN)
628 module_put(THIS_MODULE); 635 module_put(THIS_MODULE);
636
637 return NULL;
629} 638}
630 639
631static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) 640static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
@@ -644,17 +653,16 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
644 return NULL; 653 return NULL;
645} 654}
646 655
647static void rfcomm_session_close(struct rfcomm_session *s, int err) 656static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s,
657 int err)
648{ 658{
649 struct rfcomm_dlc *d; 659 struct rfcomm_dlc *d;
650 struct list_head *p, *n; 660 struct list_head *p, *n;
651 661
652 BT_DBG("session %p state %ld err %d", s, s->state, err);
653
654 rfcomm_session_hold(s);
655
656 s->state = BT_CLOSED; 662 s->state = BT_CLOSED;
657 663
664 BT_DBG("session %p state %ld err %d", s, s->state, err);
665
658 /* Close all dlcs */ 666 /* Close all dlcs */
659 list_for_each_safe(p, n, &s->dlcs) { 667 list_for_each_safe(p, n, &s->dlcs) {
660 d = list_entry(p, struct rfcomm_dlc, list); 668 d = list_entry(p, struct rfcomm_dlc, list);
@@ -663,7 +671,7 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err)
663 } 671 }
664 672
665 rfcomm_session_clear_timer(s); 673 rfcomm_session_clear_timer(s);
666 rfcomm_session_put(s); 674 return rfcomm_session_del(s);
667} 675}
668 676
669static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, 677static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
@@ -715,8 +723,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
715 if (*err == 0 || *err == -EINPROGRESS) 723 if (*err == 0 || *err == -EINPROGRESS)
716 return s; 724 return s;
717 725
718 rfcomm_session_del(s); 726 return rfcomm_session_del(s);
719 return NULL;
720 727
721failed: 728failed:
722 sock_release(sock); 729 sock_release(sock);
@@ -1105,7 +1112,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr)
1105} 1112}
1106 1113
1107/* ---- RFCOMM frame reception ---- */ 1114/* ---- RFCOMM frame reception ---- */
1108static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) 1115static struct rfcomm_session *rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
1109{ 1116{
1110 BT_DBG("session %p state %ld dlci %d", s, s->state, dlci); 1117 BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
1111 1118
@@ -1114,7 +1121,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
1114 struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci); 1121 struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci);
1115 if (!d) { 1122 if (!d) {
1116 rfcomm_send_dm(s, dlci); 1123 rfcomm_send_dm(s, dlci);
1117 return 0; 1124 return s;
1118 } 1125 }
1119 1126
1120 switch (d->state) { 1127 switch (d->state) {
@@ -1150,25 +1157,14 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
1150 break; 1157 break;
1151 1158
1152 case BT_DISCONN: 1159 case BT_DISCONN:
1153 /* rfcomm_session_put is called later so don't do 1160 s = rfcomm_session_close(s, ECONNRESET);
1154 * anything here otherwise we will mess up the session
1155 * reference counter:
1156 *
1157 * (a) when we are the initiator dlc_unlink will drive
1158 * the reference counter to 0 (there is no initial put
1159 * after session_add)
1160 *
1161 * (b) when we are not the initiator rfcomm_rx_process
1162 * will explicitly call put to balance the initial hold
1163 * done after session add.
1164 */
1165 break; 1161 break;
1166 } 1162 }
1167 } 1163 }
1168 return 0; 1164 return s;
1169} 1165}
1170 1166
1171static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) 1167static struct rfcomm_session *rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
1172{ 1168{
1173 int err = 0; 1169 int err = 0;
1174 1170
@@ -1192,13 +1188,13 @@ static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
1192 else 1188 else
1193 err = ECONNRESET; 1189 err = ECONNRESET;
1194 1190
1195 s->state = BT_CLOSED; 1191 s = rfcomm_session_close(s, err);
1196 rfcomm_session_close(s, err);
1197 } 1192 }
1198 return 0; 1193 return s;
1199} 1194}
1200 1195
1201static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) 1196static struct rfcomm_session *rfcomm_recv_disc(struct rfcomm_session *s,
1197 u8 dlci)
1202{ 1198{
1203 int err = 0; 1199 int err = 0;
1204 1200
@@ -1227,11 +1223,9 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
1227 else 1223 else
1228 err = ECONNRESET; 1224 err = ECONNRESET;
1229 1225
1230 s->state = BT_CLOSED; 1226 s = rfcomm_session_close(s, err);
1231 rfcomm_session_close(s, err);
1232 } 1227 }
1233 1228 return s;
1234 return 0;
1235} 1229}
1236 1230
1237void rfcomm_dlc_accept(struct rfcomm_dlc *d) 1231void rfcomm_dlc_accept(struct rfcomm_dlc *d)
@@ -1652,11 +1646,18 @@ drop:
1652 return 0; 1646 return 0;
1653} 1647}
1654 1648
1655static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) 1649static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s,
1650 struct sk_buff *skb)
1656{ 1651{
1657 struct rfcomm_hdr *hdr = (void *) skb->data; 1652 struct rfcomm_hdr *hdr = (void *) skb->data;
1658 u8 type, dlci, fcs; 1653 u8 type, dlci, fcs;
1659 1654
1655 if (!s) {
1656 /* no session, so free socket data */
1657 kfree_skb(skb);
1658 return s;
1659 }
1660
1660 dlci = __get_dlci(hdr->addr); 1661 dlci = __get_dlci(hdr->addr);
1661 type = __get_type(hdr->ctrl); 1662 type = __get_type(hdr->ctrl);
1662 1663
@@ -1667,7 +1668,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
1667 if (__check_fcs(skb->data, type, fcs)) { 1668 if (__check_fcs(skb->data, type, fcs)) {
1668 BT_ERR("bad checksum in packet"); 1669 BT_ERR("bad checksum in packet");
1669 kfree_skb(skb); 1670 kfree_skb(skb);
1670 return -EILSEQ; 1671 return s;
1671 } 1672 }
1672 1673
1673 if (__test_ea(hdr->len)) 1674 if (__test_ea(hdr->len))
@@ -1683,22 +1684,23 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
1683 1684
1684 case RFCOMM_DISC: 1685 case RFCOMM_DISC:
1685 if (__test_pf(hdr->ctrl)) 1686 if (__test_pf(hdr->ctrl))
1686 rfcomm_recv_disc(s, dlci); 1687 s = rfcomm_recv_disc(s, dlci);
1687 break; 1688 break;
1688 1689
1689 case RFCOMM_UA: 1690 case RFCOMM_UA:
1690 if (__test_pf(hdr->ctrl)) 1691 if (__test_pf(hdr->ctrl))
1691 rfcomm_recv_ua(s, dlci); 1692 s = rfcomm_recv_ua(s, dlci);
1692 break; 1693 break;
1693 1694
1694 case RFCOMM_DM: 1695 case RFCOMM_DM:
1695 rfcomm_recv_dm(s, dlci); 1696 s = rfcomm_recv_dm(s, dlci);
1696 break; 1697 break;
1697 1698
1698 case RFCOMM_UIH: 1699 case RFCOMM_UIH:
1699 if (dlci) 1700 if (dlci) {
1700 return rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb); 1701 rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb);
1701 1702 return s;
1703 }
1702 rfcomm_recv_mcc(s, skb); 1704 rfcomm_recv_mcc(s, skb);
1703 break; 1705 break;
1704 1706
@@ -1707,7 +1709,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
1707 break; 1709 break;
1708 } 1710 }
1709 kfree_skb(skb); 1711 kfree_skb(skb);
1710 return 0; 1712 return s;
1711} 1713}
1712 1714
1713/* ---- Connection and data processing ---- */ 1715/* ---- Connection and data processing ---- */
@@ -1844,7 +1846,7 @@ static void rfcomm_process_dlcs(struct rfcomm_session *s)
1844 } 1846 }
1845} 1847}
1846 1848
1847static void rfcomm_process_rx(struct rfcomm_session *s) 1849static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
1848{ 1850{
1849 struct socket *sock = s->sock; 1851 struct socket *sock = s->sock;
1850 struct sock *sk = sock->sk; 1852 struct sock *sk = sock->sk;
@@ -1856,17 +1858,15 @@ static void rfcomm_process_rx(struct rfcomm_session *s)
1856 while ((skb = skb_dequeue(&sk->sk_receive_queue))) { 1858 while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
1857 skb_orphan(skb); 1859 skb_orphan(skb);
1858 if (!skb_linearize(skb)) 1860 if (!skb_linearize(skb))
1859 rfcomm_recv_frame(s, skb); 1861 s = rfcomm_recv_frame(s, skb);
1860 else 1862 else
1861 kfree_skb(skb); 1863 kfree_skb(skb);
1862 } 1864 }
1863 1865
1864 if (sk->sk_state == BT_CLOSED) { 1866 if (s && (sk->sk_state == BT_CLOSED))
1865 if (!s->initiator) 1867 s = rfcomm_session_close(s, sk->sk_err);
1866 rfcomm_session_put(s);
1867 1868
1868 rfcomm_session_close(s, sk->sk_err); 1869 return s;
1869 }
1870} 1870}
1871 1871
1872static void rfcomm_accept_connection(struct rfcomm_session *s) 1872static void rfcomm_accept_connection(struct rfcomm_session *s)
@@ -1891,8 +1891,6 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
1891 1891
1892 s = rfcomm_session_add(nsock, BT_OPEN); 1892 s = rfcomm_session_add(nsock, BT_OPEN);
1893 if (s) { 1893 if (s) {
1894 rfcomm_session_hold(s);
1895
1896 /* We should adjust MTU on incoming sessions. 1894 /* We should adjust MTU on incoming sessions.
1897 * L2CAP MTU minus UIH header and FCS. */ 1895 * L2CAP MTU minus UIH header and FCS. */
1898 s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu, 1896 s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu,
@@ -1903,7 +1901,7 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
1903 sock_release(nsock); 1901 sock_release(nsock);
1904} 1902}
1905 1903
1906static void rfcomm_check_connection(struct rfcomm_session *s) 1904static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s)
1907{ 1905{
1908 struct sock *sk = s->sock->sk; 1906 struct sock *sk = s->sock->sk;
1909 1907
@@ -1921,10 +1919,10 @@ static void rfcomm_check_connection(struct rfcomm_session *s)
1921 break; 1919 break;
1922 1920
1923 case BT_CLOSED: 1921 case BT_CLOSED:
1924 s->state = BT_CLOSED; 1922 s = rfcomm_session_close(s, sk->sk_err);
1925 rfcomm_session_close(s, sk->sk_err);
1926 break; 1923 break;
1927 } 1924 }
1925 return s;
1928} 1926}
1929 1927
1930static void rfcomm_process_sessions(void) 1928static void rfcomm_process_sessions(void)
@@ -1940,7 +1938,6 @@ static void rfcomm_process_sessions(void)
1940 if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) { 1938 if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) {
1941 s->state = BT_DISCONN; 1939 s->state = BT_DISCONN;
1942 rfcomm_send_disc(s, 0); 1940 rfcomm_send_disc(s, 0);
1943 rfcomm_session_put(s);
1944 continue; 1941 continue;
1945 } 1942 }
1946 1943
@@ -1949,21 +1946,18 @@ static void rfcomm_process_sessions(void)
1949 continue; 1946 continue;
1950 } 1947 }
1951 1948
1952 rfcomm_session_hold(s);
1953
1954 switch (s->state) { 1949 switch (s->state) {
1955 case BT_BOUND: 1950 case BT_BOUND:
1956 rfcomm_check_connection(s); 1951 s = rfcomm_check_connection(s);
1957 break; 1952 break;
1958 1953
1959 default: 1954 default:
1960 rfcomm_process_rx(s); 1955 s = rfcomm_process_rx(s);
1961 break; 1956 break;
1962 } 1957 }
1963 1958
1964 rfcomm_process_dlcs(s); 1959 if (s)
1965 1960 rfcomm_process_dlcs(s);
1966 rfcomm_session_put(s);
1967 } 1961 }
1968 1962
1969 rfcomm_unlock(); 1963 rfcomm_unlock();
@@ -2010,10 +2004,11 @@ static int rfcomm_add_listener(bdaddr_t *ba)
2010 2004
2011 /* Add listening session */ 2005 /* Add listening session */
2012 s = rfcomm_session_add(sock, BT_LISTEN); 2006 s = rfcomm_session_add(sock, BT_LISTEN);
2013 if (!s) 2007 if (!s) {
2008 err = -ENOMEM;
2014 goto failed; 2009 goto failed;
2010 }
2015 2011
2016 rfcomm_session_hold(s);
2017 return 0; 2012 return 0;
2018failed: 2013failed:
2019 sock_release(sock); 2014 sock_release(sock);
@@ -2071,8 +2066,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
2071 if (!s) 2066 if (!s)
2072 return; 2067 return;
2073 2068
2074 rfcomm_session_hold(s);
2075
2076 list_for_each_safe(p, n, &s->dlcs) { 2069 list_for_each_safe(p, n, &s->dlcs) {
2077 d = list_entry(p, struct rfcomm_dlc, list); 2070 d = list_entry(p, struct rfcomm_dlc, list);
2078 2071
@@ -2104,8 +2097,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
2104 set_bit(RFCOMM_AUTH_REJECT, &d->flags); 2097 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
2105 } 2098 }
2106 2099
2107 rfcomm_session_put(s);
2108
2109 rfcomm_schedule(); 2100 rfcomm_schedule();
2110} 2101}
2111 2102
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7c9224bcce17..a8638b58c4bf 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1066,8 +1066,7 @@ void __exit rfcomm_cleanup_sockets(void)
1066 1066
1067 debugfs_remove(rfcomm_sock_debugfs); 1067 debugfs_remove(rfcomm_sock_debugfs);
1068 1068
1069 if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) 1069 bt_sock_unregister(BTPROTO_RFCOMM);
1070 BT_ERR("RFCOMM socket layer unregistration failed");
1071 1070
1072 proto_unregister(&rfcomm_proto); 1071 proto_unregister(&rfcomm_proto);
1073} 1072}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index fb6192c9812e..2c8055350510 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1113,8 +1113,7 @@ void __exit sco_exit(void)
1113 1113
1114 debugfs_remove(sco_debugfs); 1114 debugfs_remove(sco_debugfs);
1115 1115
1116 if (bt_sock_unregister(BTPROTO_SCO) < 0) 1116 bt_sock_unregister(BTPROTO_SCO);
1117 BT_ERR("SCO socket unregistration failed");
1118 1117
1119 proto_unregister(&sco_proto); 1118 proto_unregister(&sco_proto);
1120} 1119}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 314c73ed418f..967312803e41 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -348,10 +348,10 @@ void br_dev_setup(struct net_device *dev)
348 348
349 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 349 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
350 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | 350 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX |
351 NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX; 351 NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX;
352 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 352 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
353 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | 353 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
354 NETIF_F_HW_VLAN_TX; 354 NETIF_F_HW_VLAN_CTAG_TX;
355 355
356 br->dev = dev; 356 br->dev = dev;
357 spin_lock_init(&br->lock); 357 spin_lock_init(&br->lock);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index bab338e6270d..c581f1200ef7 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -161,9 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
161 if (!pv) 161 if (!pv)
162 return; 162 return;
163 163
164 for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid); 164 for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
165 vid < BR_VLAN_BITMAP_LEN;
166 vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) {
167 f = __br_fdb_get(br, br->dev->dev_addr, vid); 165 f = __br_fdb_get(br, br->dev->dev_addr, vid);
168 if (f && f->is_local && !f->dst) 166 if (f && f->is_local && !f->dst)
169 fdb_delete(br, f); 167 fdb_delete(br, f);
@@ -724,13 +722,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
724 * specify a VLAN. To be nice, add/update entry for every 722 * specify a VLAN. To be nice, add/update entry for every
725 * vlan on this port. 723 * vlan on this port.
726 */ 724 */
727 vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); 725 for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
728 while (vid < BR_VLAN_BITMAP_LEN) {
729 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); 726 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
730 if (err) 727 if (err)
731 goto out; 728 goto out;
732 vid = find_next_bit(pv->vlan_bitmap,
733 BR_VLAN_BITMAP_LEN, vid+1);
734 } 729 }
735 } 730 }
736 731
@@ -815,11 +810,8 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
815 * vlan on this port. 810 * vlan on this port.
816 */ 811 */
817 err = -ENOENT; 812 err = -ENOENT;
818 vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); 813 for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
819 while (vid < BR_VLAN_BITMAP_LEN) {
820 err &= __br_fdb_delete(p, addr, vid); 814 err &= __br_fdb_delete(p, addr, vid);
821 vid = find_next_bit(pv->vlan_bitmap,
822 BR_VLAN_BITMAP_LEN, vid+1);
823 } 815 }
824 } 816 }
825out: 817out:
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 459dab22b3f6..4cdba60926ff 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -149,7 +149,6 @@ static void del_nbp(struct net_bridge_port *p)
149 dev->priv_flags &= ~IFF_BRIDGE_PORT; 149 dev->priv_flags &= ~IFF_BRIDGE_PORT;
150 150
151 netdev_rx_handler_unregister(dev); 151 netdev_rx_handler_unregister(dev);
152 synchronize_net();
153 152
154 netdev_upper_dev_unlink(dev, br->dev); 153 netdev_upper_dev_unlink(dev, br->dev);
155 154
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index ee79f3f20383..19942e38fd2d 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -382,7 +382,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
382 return ret; 382 return ret;
383} 383}
384 384
385static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 385static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
386{ 386{
387 struct net *net = sock_net(skb->sk); 387 struct net *net = sock_net(skb->sk);
388 struct br_mdb_entry *entry; 388 struct br_mdb_entry *entry;
@@ -458,7 +458,7 @@ unlock:
458 return err; 458 return err;
459} 459}
460 460
461static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 461static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
462{ 462{
463 struct net_device *dev; 463 struct net_device *dev;
464 struct br_mdb_entry *entry; 464 struct br_mdb_entry *entry;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 923fbeaf7afd..81f2389f78eb 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1369,7 +1369,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
1369 return -EINVAL; 1369 return -EINVAL;
1370 1370
1371 if (iph->protocol != IPPROTO_IGMP) { 1371 if (iph->protocol != IPPROTO_IGMP) {
1372 if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP) 1372 if (!ipv4_is_local_multicast(iph->daddr))
1373 BR_INPUT_SKB_CB(skb)->mrouters_only = 1; 1373 BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
1374 return 0; 1374 return 0;
1375 } 1375 }
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index fe43bc7b063f..1ed75bfd8d1d 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -535,7 +535,8 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
535 if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) 535 if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
536 return br; 536 return br;
537 537
538 vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK); 538 vlan = __vlan_find_dev_deep(br, skb->vlan_proto,
539 vlan_tx_tag_get(skb) & VLAN_VID_MASK);
539 540
540 return vlan ? vlan : br; 541 return vlan ? vlan : br;
541} 542}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 299fc5f40a26..8e3abf564798 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -136,10 +136,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
136 goto nla_put_failure; 136 goto nla_put_failure;
137 137
138 pvid = br_get_pvid(pv); 138 pvid = br_get_pvid(pv);
139 for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); 139 for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
140 vid < BR_VLAN_BITMAP_LEN;
141 vid = find_next_bit(pv->vlan_bitmap,
142 BR_VLAN_BITMAP_LEN, vid+1)) {
143 vinfo.vid = vid; 140 vinfo.vid = vid;
144 vinfo.flags = 0; 141 vinfo.flags = 0;
145 if (vid == pvid) 142 if (vid == pvid)
@@ -355,17 +352,14 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
355/* Change state and parameters on port. */ 352/* Change state and parameters on port. */
356int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) 353int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
357{ 354{
358 struct ifinfomsg *ifm;
359 struct nlattr *protinfo; 355 struct nlattr *protinfo;
360 struct nlattr *afspec; 356 struct nlattr *afspec;
361 struct net_bridge_port *p; 357 struct net_bridge_port *p;
362 struct nlattr *tb[IFLA_BRPORT_MAX + 1]; 358 struct nlattr *tb[IFLA_BRPORT_MAX + 1];
363 int err; 359 int err = 0;
364
365 ifm = nlmsg_data(nlh);
366 360
367 protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); 361 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
368 afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); 362 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
369 if (!protinfo && !afspec) 363 if (!protinfo && !afspec)
370 return 0; 364 return 0;
371 365
@@ -373,7 +367,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
373 /* We want to accept dev as bridge itself if the AF_SPEC 367 /* We want to accept dev as bridge itself if the AF_SPEC
374 * is set to see if someone is setting vlan info on the brigde 368 * is set to see if someone is setting vlan info on the brigde
375 */ 369 */
376 if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) 370 if (!p && !afspec)
377 return -EINVAL; 371 return -EINVAL;
378 372
379 if (p && protinfo) { 373 if (p && protinfo) {
@@ -414,14 +408,11 @@ out:
414/* Delete port information */ 408/* Delete port information */
415int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) 409int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
416{ 410{
417 struct ifinfomsg *ifm;
418 struct nlattr *afspec; 411 struct nlattr *afspec;
419 struct net_bridge_port *p; 412 struct net_bridge_port *p;
420 int err; 413 int err;
421 414
422 ifm = nlmsg_data(nlh); 415 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
423
424 afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
425 if (!afspec) 416 if (!afspec)
426 return 0; 417 return 0;
427 418
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 93dde75923f0..bd58b45f5f90 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -34,6 +34,7 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
34 34
35static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) 35static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
36{ 36{
37 const struct net_device_ops *ops;
37 struct net_bridge_port *p = NULL; 38 struct net_bridge_port *p = NULL;
38 struct net_bridge *br; 39 struct net_bridge *br;
39 struct net_device *dev; 40 struct net_device *dev;
@@ -53,15 +54,17 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
53 br = v->parent.br; 54 br = v->parent.br;
54 dev = br->dev; 55 dev = br->dev;
55 } 56 }
57 ops = dev->netdev_ops;
56 58
57 if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) { 59 if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
58 /* Add VLAN to the device filter if it is supported. 60 /* Add VLAN to the device filter if it is supported.
59 * Stricly speaking, this is not necessary now, since 61 * Stricly speaking, this is not necessary now, since
60 * devices are made promiscuous by the bridge, but if 62 * devices are made promiscuous by the bridge, but if
61 * that ever changes this code will allow tagged 63 * that ever changes this code will allow tagged
62 * traffic to enter the bridge. 64 * traffic to enter the bridge.
63 */ 65 */
64 err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid); 66 err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q),
67 vid);
65 if (err) 68 if (err)
66 return err; 69 return err;
67 } 70 }
@@ -82,8 +85,8 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
82 return 0; 85 return 0;
83 86
84out_filt: 87out_filt:
85 if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) 88 if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
86 dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); 89 ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid);
87 return err; 90 return err;
88} 91}
89 92
@@ -97,9 +100,10 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
97 100
98 if (v->port_idx && vid) { 101 if (v->port_idx && vid) {
99 struct net_device *dev = v->parent.port->dev; 102 struct net_device *dev = v->parent.port->dev;
103 const struct net_device_ops *ops = dev->netdev_ops;
100 104
101 if (dev->features & NETIF_F_HW_VLAN_FILTER) 105 if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
102 dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); 106 ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid);
103 } 107 }
104 108
105 clear_bit(vid, v->vlan_bitmap); 109 clear_bit(vid, v->vlan_bitmap);
@@ -171,7 +175,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
171 * mac header. 175 * mac header.
172 */ 176 */
173 skb_push(skb, ETH_HLEN); 177 skb_push(skb, ETH_HLEN);
174 skb = __vlan_put_tag(skb, skb->vlan_tci); 178 skb = __vlan_put_tag(skb, skb->vlan_proto, skb->vlan_tci);
175 if (!skb) 179 if (!skb)
176 goto out; 180 goto out;
177 /* put skb->data back to where it was */ 181 /* put skb->data back to where it was */
@@ -213,7 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
213 /* PVID is set on this port. Any untagged ingress 217 /* PVID is set on this port. Any untagged ingress
214 * frame is considered to belong to this vlan. 218 * frame is considered to belong to this vlan.
215 */ 219 */
216 __vlan_hwaccel_put_tag(skb, pvid); 220 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
217 return true; 221 return true;
218 } 222 }
219 223
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 92de5e5f9db2..9878eb8204c5 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -78,6 +78,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
78 const char *prefix) 78 const char *prefix)
79{ 79{
80 unsigned int bitmask; 80 unsigned int bitmask;
81 struct net *net = dev_net(in ? in : out);
82
83 /* FIXME: Disabled from containers until syslog ns is supported */
84 if (!net_eq(net, &init_net))
85 return;
81 86
82 spin_lock_bh(&ebt_log_lock); 87 spin_lock_bh(&ebt_log_lock);
83 printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", 88 printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
@@ -176,17 +181,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
176{ 181{
177 const struct ebt_log_info *info = par->targinfo; 182 const struct ebt_log_info *info = par->targinfo;
178 struct nf_loginfo li; 183 struct nf_loginfo li;
184 struct net *net = dev_net(par->in ? par->in : par->out);
179 185
180 li.type = NF_LOG_TYPE_LOG; 186 li.type = NF_LOG_TYPE_LOG;
181 li.u.log.level = info->loglevel; 187 li.u.log.level = info->loglevel;
182 li.u.log.logflags = info->bitmask; 188 li.u.log.logflags = info->bitmask;
183 189
184 if (info->bitmask & EBT_LOG_NFLOG) 190 if (info->bitmask & EBT_LOG_NFLOG)
185 nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, 191 nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
186 par->out, &li, "%s", info->prefix); 192 par->in, par->out, &li, "%s", info->prefix);
187 else 193 else
188 ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, 194 ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
189 par->out, &li, info->prefix); 195 par->out, &li, info->prefix);
190 return EBT_CONTINUE; 196 return EBT_CONTINUE;
191} 197}
192 198
@@ -206,19 +212,47 @@ static struct nf_logger ebt_log_logger __read_mostly = {
206 .me = THIS_MODULE, 212 .me = THIS_MODULE,
207}; 213};
208 214
215static int __net_init ebt_log_net_init(struct net *net)
216{
217 nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger);
218 return 0;
219}
220
221static void __net_exit ebt_log_net_fini(struct net *net)
222{
223 nf_log_unset(net, &ebt_log_logger);
224}
225
226static struct pernet_operations ebt_log_net_ops = {
227 .init = ebt_log_net_init,
228 .exit = ebt_log_net_fini,
229};
230
209static int __init ebt_log_init(void) 231static int __init ebt_log_init(void)
210{ 232{
211 int ret; 233 int ret;
212 234
235 ret = register_pernet_subsys(&ebt_log_net_ops);
236 if (ret < 0)
237 goto err_pernet;
238
213 ret = xt_register_target(&ebt_log_tg_reg); 239 ret = xt_register_target(&ebt_log_tg_reg);
214 if (ret < 0) 240 if (ret < 0)
215 return ret; 241 goto err_target;
242
216 nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); 243 nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
217 return 0; 244
245 return ret;
246
247err_target:
248 unregister_pernet_subsys(&ebt_log_net_ops);
249err_pernet:
250 return ret;
218} 251}
219 252
220static void __exit ebt_log_fini(void) 253static void __exit ebt_log_fini(void)
221{ 254{
255 unregister_pernet_subsys(&ebt_log_net_ops);
222 nf_log_unregister(&ebt_log_logger); 256 nf_log_unregister(&ebt_log_logger);
223 xt_unregister_target(&ebt_log_tg_reg); 257 xt_unregister_target(&ebt_log_tg_reg);
224} 258}
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 5be68bbcc341..59ac7952010d 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
24{ 24{
25 const struct ebt_nflog_info *info = par->targinfo; 25 const struct ebt_nflog_info *info = par->targinfo;
26 struct nf_loginfo li; 26 struct nf_loginfo li;
27 struct net *net = dev_net(par->in ? par->in : par->out);
27 28
28 li.type = NF_LOG_TYPE_ULOG; 29 li.type = NF_LOG_TYPE_ULOG;
29 li.u.ulog.copy_len = info->len; 30 li.u.ulog.copy_len = info->len;
30 li.u.ulog.group = info->group; 31 li.u.ulog.group = info->group;
31 li.u.ulog.qthreshold = info->threshold; 32 li.u.ulog.qthreshold = info->threshold;
32 33
33 nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, 34 nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in,
34 &li, "%s", info->prefix); 35 par->out, &li, "%s", info->prefix);
35 return EBT_CONTINUE; 36 return EBT_CONTINUE;
36} 37}
37 38
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 3bf43f7bb9d4..fc1905c51417 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -35,12 +35,13 @@
35#include <linux/skbuff.h> 35#include <linux/skbuff.h>
36#include <linux/kernel.h> 36#include <linux/kernel.h>
37#include <linux/timer.h> 37#include <linux/timer.h>
38#include <linux/netlink.h> 38#include <net/netlink.h>
39#include <linux/netdevice.h> 39#include <linux/netdevice.h>
40#include <linux/netfilter/x_tables.h> 40#include <linux/netfilter/x_tables.h>
41#include <linux/netfilter_bridge/ebtables.h> 41#include <linux/netfilter_bridge/ebtables.h>
42#include <linux/netfilter_bridge/ebt_ulog.h> 42#include <linux/netfilter_bridge/ebt_ulog.h>
43#include <net/netfilter/nf_log.h> 43#include <net/netfilter/nf_log.h>
44#include <net/netns/generic.h>
44#include <net/sock.h> 45#include <net/sock.h>
45#include "../br_private.h" 46#include "../br_private.h"
46 47
@@ -62,13 +63,22 @@ typedef struct {
62 spinlock_t lock; /* the per-queue lock */ 63 spinlock_t lock; /* the per-queue lock */
63} ebt_ulog_buff_t; 64} ebt_ulog_buff_t;
64 65
65static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; 66static int ebt_ulog_net_id __read_mostly;
66static struct sock *ebtulognl; 67struct ebt_ulog_net {
68 unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS];
69 ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
70 struct sock *ebtulognl;
71};
72
73static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net)
74{
75 return net_generic(net, ebt_ulog_net_id);
76}
67 77
68/* send one ulog_buff_t to userspace */ 78/* send one ulog_buff_t to userspace */
69static void ulog_send(unsigned int nlgroup) 79static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)
70{ 80{
71 ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; 81 ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];
72 82
73 del_timer(&ub->timer); 83 del_timer(&ub->timer);
74 84
@@ -80,7 +90,7 @@ static void ulog_send(unsigned int nlgroup)
80 ub->lastnlh->nlmsg_type = NLMSG_DONE; 90 ub->lastnlh->nlmsg_type = NLMSG_DONE;
81 91
82 NETLINK_CB(ub->skb).dst_group = nlgroup + 1; 92 NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
83 netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); 93 netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
84 94
85 ub->qlen = 0; 95 ub->qlen = 0;
86 ub->skb = NULL; 96 ub->skb = NULL;
@@ -89,10 +99,15 @@ static void ulog_send(unsigned int nlgroup)
89/* timer function to flush queue in flushtimeout time */ 99/* timer function to flush queue in flushtimeout time */
90static void ulog_timer(unsigned long data) 100static void ulog_timer(unsigned long data)
91{ 101{
92 spin_lock_bh(&ulog_buffers[data].lock); 102 struct ebt_ulog_net *ebt = container_of((void *)data,
93 if (ulog_buffers[data].skb) 103 struct ebt_ulog_net,
94 ulog_send(data); 104 nlgroup[*(unsigned int *)data]);
95 spin_unlock_bh(&ulog_buffers[data].lock); 105
106 ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data];
107 spin_lock_bh(&ub->lock);
108 if (ub->skb)
109 ulog_send(ebt, *(unsigned int *)data);
110 spin_unlock_bh(&ub->lock);
96} 111}
97 112
98static struct sk_buff *ulog_alloc_skb(unsigned int size) 113static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -123,8 +138,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
123 ebt_ulog_packet_msg_t *pm; 138 ebt_ulog_packet_msg_t *pm;
124 size_t size, copy_len; 139 size_t size, copy_len;
125 struct nlmsghdr *nlh; 140 struct nlmsghdr *nlh;
141 struct net *net = dev_net(in ? in : out);
142 struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
126 unsigned int group = uloginfo->nlgroup; 143 unsigned int group = uloginfo->nlgroup;
127 ebt_ulog_buff_t *ub = &ulog_buffers[group]; 144 ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
128 spinlock_t *lock = &ub->lock; 145 spinlock_t *lock = &ub->lock;
129 ktime_t kt; 146 ktime_t kt;
130 147
@@ -134,7 +151,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
134 else 151 else
135 copy_len = uloginfo->cprange; 152 copy_len = uloginfo->cprange;
136 153
137 size = NLMSG_SPACE(sizeof(*pm) + copy_len); 154 size = nlmsg_total_size(sizeof(*pm) + copy_len);
138 if (size > nlbufsiz) { 155 if (size > nlbufsiz) {
139 pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz); 156 pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);
140 return; 157 return;
@@ -146,7 +163,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
146 if (!(ub->skb = ulog_alloc_skb(size))) 163 if (!(ub->skb = ulog_alloc_skb(size)))
147 goto unlock; 164 goto unlock;
148 } else if (size > skb_tailroom(ub->skb)) { 165 } else if (size > skb_tailroom(ub->skb)) {
149 ulog_send(group); 166 ulog_send(ebt, group);
150 167
151 if (!(ub->skb = ulog_alloc_skb(size))) 168 if (!(ub->skb = ulog_alloc_skb(size)))
152 goto unlock; 169 goto unlock;
@@ -205,7 +222,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
205 ub->lastnlh = nlh; 222 ub->lastnlh = nlh;
206 223
207 if (ub->qlen >= uloginfo->qthreshold) 224 if (ub->qlen >= uloginfo->qthreshold)
208 ulog_send(group); 225 ulog_send(ebt, group);
209 else if (!timer_pending(&ub->timer)) { 226 else if (!timer_pending(&ub->timer)) {
210 ub->timer.expires = jiffies + flushtimeout * HZ / 100; 227 ub->timer.expires = jiffies + flushtimeout * HZ / 100;
211 add_timer(&ub->timer); 228 add_timer(&ub->timer);
@@ -277,56 +294,89 @@ static struct nf_logger ebt_ulog_logger __read_mostly = {
277 .me = THIS_MODULE, 294 .me = THIS_MODULE,
278}; 295};
279 296
280static int __init ebt_ulog_init(void) 297static int __net_init ebt_ulog_net_init(struct net *net)
281{ 298{
282 int ret;
283 int i; 299 int i;
300 struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
301
284 struct netlink_kernel_cfg cfg = { 302 struct netlink_kernel_cfg cfg = {
285 .groups = EBT_ULOG_MAXNLGROUPS, 303 .groups = EBT_ULOG_MAXNLGROUPS,
286 }; 304 };
287 305
288 if (nlbufsiz >= 128*1024) {
289 pr_warning("Netlink buffer has to be <= 128kB,"
290 " please try a smaller nlbufsiz parameter.\n");
291 return -EINVAL;
292 }
293
294 /* initialize ulog_buffers */ 306 /* initialize ulog_buffers */
295 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { 307 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
296 setup_timer(&ulog_buffers[i].timer, ulog_timer, i); 308 ebt->nlgroup[i] = i;
297 spin_lock_init(&ulog_buffers[i].lock); 309 setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer,
310 (unsigned long)&ebt->nlgroup[i]);
311 spin_lock_init(&ebt->ulog_buffers[i].lock);
298 } 312 }
299 313
300 ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); 314 ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
301 if (!ebtulognl) 315 if (!ebt->ebtulognl)
302 ret = -ENOMEM; 316 return -ENOMEM;
303 else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
304 netlink_kernel_release(ebtulognl);
305 317
306 if (ret == 0) 318 nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger);
307 nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); 319 return 0;
308
309 return ret;
310} 320}
311 321
312static void __exit ebt_ulog_fini(void) 322static void __net_exit ebt_ulog_net_fini(struct net *net)
313{ 323{
314 ebt_ulog_buff_t *ub;
315 int i; 324 int i;
325 struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
316 326
317 nf_log_unregister(&ebt_ulog_logger); 327 nf_log_unset(net, &ebt_ulog_logger);
318 xt_unregister_target(&ebt_ulog_tg_reg);
319 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { 328 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
320 ub = &ulog_buffers[i]; 329 ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i];
321 del_timer(&ub->timer); 330 del_timer(&ub->timer);
322 spin_lock_bh(&ub->lock); 331
323 if (ub->skb) { 332 if (ub->skb) {
324 kfree_skb(ub->skb); 333 kfree_skb(ub->skb);
325 ub->skb = NULL; 334 ub->skb = NULL;
326 } 335 }
327 spin_unlock_bh(&ub->lock);
328 } 336 }
329 netlink_kernel_release(ebtulognl); 337 netlink_kernel_release(ebt->ebtulognl);
338}
339
340static struct pernet_operations ebt_ulog_net_ops = {
341 .init = ebt_ulog_net_init,
342 .exit = ebt_ulog_net_fini,
343 .id = &ebt_ulog_net_id,
344 .size = sizeof(struct ebt_ulog_net),
345};
346
347static int __init ebt_ulog_init(void)
348{
349 int ret;
350
351 if (nlbufsiz >= 128*1024) {
352 pr_warn("Netlink buffer has to be <= 128kB,"
353 "please try a smaller nlbufsiz parameter.\n");
354 return -EINVAL;
355 }
356
357 ret = register_pernet_subsys(&ebt_ulog_net_ops);
358 if (ret)
359 goto out_pernet;
360
361 ret = xt_register_target(&ebt_ulog_tg_reg);
362 if (ret)
363 goto out_target;
364
365 nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
366
367 return 0;
368
369out_target:
370 unregister_pernet_subsys(&ebt_ulog_net_ops);
371out_pernet:
372 return ret;
373}
374
375static void __exit ebt_ulog_fini(void)
376{
377 nf_log_unregister(&ebt_ulog_logger);
378 xt_unregister_target(&ebt_ulog_tg_reg);
379 unregister_pernet_subsys(&ebt_ulog_net_ops);
330} 380}
331 381
332module_init(ebt_ulog_init); 382module_init(ebt_ulog_init);
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 40d8258bf74f..70f656ce0f4a 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -64,9 +64,7 @@ static int ebt_broute(struct sk_buff *skb)
64static int __net_init broute_net_init(struct net *net) 64static int __net_init broute_net_init(struct net *net)
65{ 65{
66 net->xt.broute_table = ebt_register_table(net, &broute_table); 66 net->xt.broute_table = ebt_register_table(net, &broute_table);
67 if (IS_ERR(net->xt.broute_table)) 67 return PTR_RET(net->xt.broute_table);
68 return PTR_ERR(net->xt.broute_table);
69 return 0;
70} 68}
71 69
72static void __net_exit broute_net_exit(struct net *net) 70static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 8d493c91a562..3d110c4fc787 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -138,7 +138,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
138 ethproto = h->h_proto; 138 ethproto = h->h_proto;
139 139
140 if (e->bitmask & EBT_802_3) { 140 if (e->bitmask & EBT_802_3) {
141 if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO)) 141 if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO))
142 return 1; 142 return 1;
143 } else if (!(e->bitmask & EBT_NOPROTO) && 143 } else if (!(e->bitmask & EBT_NOPROTO) &&
144 FWINV2(e->ethproto != ethproto, EBT_IPROTO)) 144 FWINV2(e->ethproto != ethproto, EBT_IPROTO))
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 21760f008974..df6d56d8689a 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -301,10 +301,11 @@ static void dev_flowctrl(struct net_device *dev, int on)
301} 301}
302 302
303void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, 303void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
304 struct cflayer *link_support, int head_room, 304 struct cflayer *link_support, int head_room,
305 struct cflayer **layer, int (**rcv_func)( 305 struct cflayer **layer,
306 struct sk_buff *, struct net_device *, 306 int (**rcv_func)(struct sk_buff *, struct net_device *,
307 struct packet_type *, struct net_device *)) 307 struct packet_type *,
308 struct net_device *))
308{ 309{
309 struct caif_device_entry *caifd; 310 struct caif_device_entry *caifd;
310 enum cfcnfg_phy_preference pref; 311 enum cfcnfg_phy_preference pref;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index ff2ff3ce6965..630b8be6e748 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -197,8 +197,8 @@ static void cfsk_put(struct cflayer *layr)
197 197
198/* Packet Control Callback function called from CAIF */ 198/* Packet Control Callback function called from CAIF */
199static void caif_ctrl_cb(struct cflayer *layr, 199static void caif_ctrl_cb(struct cflayer *layr,
200 enum caif_ctrlcmd flow, 200 enum caif_ctrlcmd flow,
201 int phyid) 201 int phyid)
202{ 202{
203 struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); 203 struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
204 switch (flow) { 204 switch (flow) {
@@ -274,7 +274,7 @@ static void caif_check_flow_release(struct sock *sk)
274 * changed locking, address handling and added MSG_TRUNC. 274 * changed locking, address handling and added MSG_TRUNC.
275 */ 275 */
276static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, 276static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
277 struct msghdr *m, size_t len, int flags) 277 struct msghdr *m, size_t len, int flags)
278 278
279{ 279{
280 struct sock *sk = sock->sk; 280 struct sock *sk = sock->sk;
@@ -348,8 +348,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
348 * changed locking calls, changed address handling. 348 * changed locking calls, changed address handling.
349 */ 349 */
350static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, 350static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
351 struct msghdr *msg, size_t size, 351 struct msghdr *msg, size_t size,
352 int flags) 352 int flags)
353{ 353{
354 struct sock *sk = sock->sk; 354 struct sock *sk = sock->sk;
355 int copied = 0; 355 int copied = 0;
@@ -464,7 +464,7 @@ out:
464 * CAIF flow-on and sock_writable. 464 * CAIF flow-on and sock_writable.
465 */ 465 */
466static long caif_wait_for_flow_on(struct caifsock *cf_sk, 466static long caif_wait_for_flow_on(struct caifsock *cf_sk,
467 int wait_writeable, long timeo, int *err) 467 int wait_writeable, long timeo, int *err)
468{ 468{
469 struct sock *sk = &cf_sk->sk; 469 struct sock *sk = &cf_sk->sk;
470 DEFINE_WAIT(wait); 470 DEFINE_WAIT(wait);
@@ -518,7 +518,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
518 518
519/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ 519/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
520static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, 520static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
521 struct msghdr *msg, size_t len) 521 struct msghdr *msg, size_t len)
522{ 522{
523 struct sock *sk = sock->sk; 523 struct sock *sk = sock->sk;
524 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 524 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -593,7 +593,7 @@ err:
593 * and other minor adaptations. 593 * and other minor adaptations.
594 */ 594 */
595static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, 595static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
596 struct msghdr *msg, size_t len) 596 struct msghdr *msg, size_t len)
597{ 597{
598 struct sock *sk = sock->sk; 598 struct sock *sk = sock->sk;
599 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 599 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -672,7 +672,7 @@ out_err:
672} 672}
673 673
674static int setsockopt(struct socket *sock, 674static int setsockopt(struct socket *sock,
675 int lvl, int opt, char __user *ov, unsigned int ol) 675 int lvl, int opt, char __user *ov, unsigned int ol)
676{ 676{
677 struct sock *sk = sock->sk; 677 struct sock *sk = sock->sk;
678 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 678 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -934,7 +934,7 @@ static int caif_release(struct socket *sock)
934 934
935/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ 935/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
936static unsigned int caif_poll(struct file *file, 936static unsigned int caif_poll(struct file *file,
937 struct socket *sock, poll_table *wait) 937 struct socket *sock, poll_table *wait)
938{ 938{
939 struct sock *sk = sock->sk; 939 struct sock *sk = sock->sk;
940 unsigned int mask; 940 unsigned int mask;
@@ -1024,7 +1024,7 @@ static void caif_sock_destructor(struct sock *sk)
1024} 1024}
1025 1025
1026static int caif_create(struct net *net, struct socket *sock, int protocol, 1026static int caif_create(struct net *net, struct socket *sock, int protocol,
1027 int kern) 1027 int kern)
1028{ 1028{
1029 struct sock *sk = NULL; 1029 struct sock *sk = NULL;
1030 struct caifsock *cf_sk = NULL; 1030 struct caifsock *cf_sk = NULL;
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index ef8ebaa993cf..d76278d644b8 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -75,7 +75,7 @@ static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt)
75} 75}
76 76
77static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 77static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
78 int phyid) 78 int phyid)
79{ 79{
80 if (layr->up && layr->up->ctrlcmd) 80 if (layr->up && layr->up->ctrlcmd)
81 layr->up->ctrlcmd(layr->up, ctrl, layr->id); 81 layr->up->ctrlcmd(layr->up, ctrl, layr->id);
@@ -121,7 +121,7 @@ static struct packet_type caif_usb_type __read_mostly = {
121}; 121};
122 122
123static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, 123static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
124 void *arg) 124 void *arg)
125{ 125{
126 struct net_device *dev = arg; 126 struct net_device *dev = arg;
127 struct caif_dev_common common; 127 struct caif_dev_common common;
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index f1dbddb95a6c..246ac3aa8de5 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -61,11 +61,11 @@ struct cfcnfg {
61}; 61};
62 62
63static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, 63static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
64 enum cfctrl_srv serv, u8 phyid, 64 enum cfctrl_srv serv, u8 phyid,
65 struct cflayer *adapt_layer); 65 struct cflayer *adapt_layer);
66static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id); 66static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id);
67static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, 67static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
68 struct cflayer *adapt_layer); 68 struct cflayer *adapt_layer);
69static void cfctrl_resp_func(void); 69static void cfctrl_resp_func(void);
70static void cfctrl_enum_resp(void); 70static void cfctrl_enum_resp(void);
71 71
@@ -131,7 +131,7 @@ static void cfctrl_resp_func(void)
131} 131}
132 132
133static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg, 133static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg,
134 u8 phyid) 134 u8 phyid)
135{ 135{
136 struct cfcnfg_phyinfo *phy; 136 struct cfcnfg_phyinfo *phy;
137 137
@@ -216,8 +216,8 @@ static const int protohead[CFCTRL_SRV_MASK] = {
216 216
217 217
218static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, 218static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
219 struct caif_connect_request *s, 219 struct caif_connect_request *s,
220 struct cfctrl_link_param *l) 220 struct cfctrl_link_param *l)
221{ 221{
222 struct dev_info *dev_info; 222 struct dev_info *dev_info;
223 enum cfcnfg_phy_preference pref; 223 enum cfcnfg_phy_preference pref;
@@ -301,8 +301,7 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
301 301
302int caif_connect_client(struct net *net, struct caif_connect_request *conn_req, 302int caif_connect_client(struct net *net, struct caif_connect_request *conn_req,
303 struct cflayer *adap_layer, int *ifindex, 303 struct cflayer *adap_layer, int *ifindex,
304 int *proto_head, 304 int *proto_head, int *proto_tail)
305 int *proto_tail)
306{ 305{
307 struct cflayer *frml; 306 struct cflayer *frml;
308 struct cfcnfg_phyinfo *phy; 307 struct cfcnfg_phyinfo *phy;
@@ -364,7 +363,7 @@ unlock:
364EXPORT_SYMBOL(caif_connect_client); 363EXPORT_SYMBOL(caif_connect_client);
365 364
366static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, 365static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
367 struct cflayer *adapt_layer) 366 struct cflayer *adapt_layer)
368{ 367{
369 if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL) 368 if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
370 adapt_layer->ctrlcmd(adapt_layer, 369 adapt_layer->ctrlcmd(adapt_layer,
@@ -526,7 +525,7 @@ out_err:
526EXPORT_SYMBOL(cfcnfg_add_phy_layer); 525EXPORT_SYMBOL(cfcnfg_add_phy_layer);
527 526
528int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer, 527int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer,
529 bool up) 528 bool up)
530{ 529{
531 struct cfcnfg_phyinfo *phyinfo; 530 struct cfcnfg_phyinfo *phyinfo;
532 531
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index a376ec1ac0a7..9cd057c59c59 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -20,12 +20,12 @@
20 20
21#ifdef CAIF_NO_LOOP 21#ifdef CAIF_NO_LOOP
22static int handle_loop(struct cfctrl *ctrl, 22static int handle_loop(struct cfctrl *ctrl,
23 int cmd, struct cfpkt *pkt){ 23 int cmd, struct cfpkt *pkt){
24 return -1; 24 return -1;
25} 25}
26#else 26#else
27static int handle_loop(struct cfctrl *ctrl, 27static int handle_loop(struct cfctrl *ctrl,
28 int cmd, struct cfpkt *pkt); 28 int cmd, struct cfpkt *pkt);
29#endif 29#endif
30static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt); 30static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt);
31static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 31static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
@@ -72,7 +72,7 @@ void cfctrl_remove(struct cflayer *layer)
72} 72}
73 73
74static bool param_eq(const struct cfctrl_link_param *p1, 74static bool param_eq(const struct cfctrl_link_param *p1,
75 const struct cfctrl_link_param *p2) 75 const struct cfctrl_link_param *p2)
76{ 76{
77 bool eq = 77 bool eq =
78 p1->linktype == p2->linktype && 78 p1->linktype == p2->linktype &&
@@ -197,8 +197,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
197} 197}
198 198
199int cfctrl_linkup_request(struct cflayer *layer, 199int cfctrl_linkup_request(struct cflayer *layer,
200 struct cfctrl_link_param *param, 200 struct cfctrl_link_param *param,
201 struct cflayer *user_layer) 201 struct cflayer *user_layer)
202{ 202{
203 struct cfctrl *cfctrl = container_obj(layer); 203 struct cfctrl *cfctrl = container_obj(layer);
204 u32 tmp32; 204 u32 tmp32;
@@ -301,7 +301,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
301} 301}
302 302
303int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, 303int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
304 struct cflayer *client) 304 struct cflayer *client)
305{ 305{
306 int ret; 306 int ret;
307 struct cfpkt *pkt; 307 struct cfpkt *pkt;
@@ -555,7 +555,7 @@ error:
555} 555}
556 556
557static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 557static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
558 int phyid) 558 int phyid)
559{ 559{
560 struct cfctrl *this = container_obj(layr); 560 struct cfctrl *this = container_obj(layr);
561 switch (ctrl) { 561 switch (ctrl) {
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 0a7df7ef062d..204c5e226a61 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -28,7 +28,7 @@ struct cffrml {
28static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt); 28static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);
29static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt); 29static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt);
30static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 30static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
31 int phyid); 31 int phyid);
32 32
33static u32 cffrml_rcv_error; 33static u32 cffrml_rcv_error;
34static u32 cffrml_rcv_checsum_error; 34static u32 cffrml_rcv_checsum_error;
@@ -167,7 +167,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
167} 167}
168 168
169static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 169static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
170 int phyid) 170 int phyid)
171{ 171{
172 if (layr->up && layr->up->ctrlcmd) 172 if (layr->up && layr->up->ctrlcmd)
173 layr->up->ctrlcmd(layr->up, ctrl, layr->id); 173 layr->up->ctrlcmd(layr->up, ctrl, layr->id);
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 94b08612a4d8..154d9f8f964c 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -42,7 +42,7 @@ struct cfmuxl {
42static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt); 42static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt);
43static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt); 43static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt);
44static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 44static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
45 int phyid); 45 int phyid);
46static struct cflayer *get_up(struct cfmuxl *muxl, u16 id); 46static struct cflayer *get_up(struct cfmuxl *muxl, u16 id);
47 47
48struct cflayer *cfmuxl_create(void) 48struct cflayer *cfmuxl_create(void)
@@ -244,7 +244,7 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
244} 244}
245 245
246static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 246static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
247 int phyid) 247 int phyid)
248{ 248{
249 struct cfmuxl *muxl = container_obj(layr); 249 struct cfmuxl *muxl = container_obj(layr);
250 struct cflayer *layer; 250 struct cflayer *layer;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 863dedd91bb6..e8f9c149504d 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -266,8 +266,8 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt)
266} 266}
267 267
268inline u16 cfpkt_iterate(struct cfpkt *pkt, 268inline u16 cfpkt_iterate(struct cfpkt *pkt,
269 u16 (*iter_func)(u16, void *, u16), 269 u16 (*iter_func)(u16, void *, u16),
270 u16 data) 270 u16 data)
271{ 271{
272 /* 272 /*
273 * Don't care about the performance hit of linearizing, 273 * Don't care about the performance hit of linearizing,
@@ -307,8 +307,8 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
307} 307}
308 308
309struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, 309struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
310 struct cfpkt *addpkt, 310 struct cfpkt *addpkt,
311 u16 expectlen) 311 u16 expectlen)
312{ 312{
313 struct sk_buff *dst = pkt_to_skb(dstpkt); 313 struct sk_buff *dst = pkt_to_skb(dstpkt);
314 struct sk_buff *add = pkt_to_skb(addpkt); 314 struct sk_buff *add = pkt_to_skb(addpkt);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 2b563ad04597..db51830c8587 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -43,7 +43,7 @@ static void cfrfml_release(struct cflayer *layer)
43} 43}
44 44
45struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, 45struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
46 int mtu_size) 46 int mtu_size)
47{ 47{
48 int tmp; 48 int tmp;
49 struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); 49 struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
@@ -69,7 +69,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
69} 69}
70 70
71static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead, 71static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
72 struct cfpkt *pkt, int *err) 72 struct cfpkt *pkt, int *err)
73{ 73{
74 struct cfpkt *tmppkt; 74 struct cfpkt *tmppkt;
75 *err = -EPROTO; 75 *err = -EPROTO;
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 8e68b97f13ee..147c232b1285 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -29,7 +29,7 @@ struct cfserl {
29static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt); 29static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
30static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); 30static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
31static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 31static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
32 int phyid); 32 int phyid);
33 33
34struct cflayer *cfserl_create(int instance, bool use_stx) 34struct cflayer *cfserl_create(int instance, bool use_stx)
35{ 35{
@@ -182,7 +182,7 @@ static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)
182} 182}
183 183
184static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 184static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
185 int phyid) 185 int phyid)
186{ 186{
187 layr->up->ctrlcmd(layr->up, ctrl, phyid); 187 layr->up->ctrlcmd(layr->up, ctrl, phyid);
188} 188}
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index ba217e90765e..95f7f5ea30ef 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -25,7 +25,7 @@
25#define container_obj(layr) container_of(layr, struct cfsrvl, layer) 25#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
26 26
27static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 27static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
28 int phyid) 28 int phyid)
29{ 29{
30 struct cfsrvl *service = container_obj(layr); 30 struct cfsrvl *service = container_obj(layr);
31 31
@@ -158,10 +158,9 @@ static void cfsrvl_release(struct cflayer *layer)
158} 158}
159 159
160void cfsrvl_init(struct cfsrvl *service, 160void cfsrvl_init(struct cfsrvl *service,
161 u8 channel_id, 161 u8 channel_id,
162 struct dev_info *dev_info, 162 struct dev_info *dev_info,
163 bool supports_flowctrl 163 bool supports_flowctrl)
164 )
165{ 164{
166 caif_assert(offsetof(struct cfsrvl, layer) == 0); 165 caif_assert(offsetof(struct cfsrvl, layer) == 0);
167 service->open = false; 166 service->open = false;
@@ -207,8 +206,8 @@ void caif_free_client(struct cflayer *adap_layer)
207EXPORT_SYMBOL(caif_free_client); 206EXPORT_SYMBOL(caif_free_client);
208 207
209void caif_client_register_refcnt(struct cflayer *adapt_layer, 208void caif_client_register_refcnt(struct cflayer *adapt_layer,
210 void (*hold)(struct cflayer *lyr), 209 void (*hold)(struct cflayer *lyr),
211 void (*put)(struct cflayer *lyr)) 210 void (*put)(struct cflayer *lyr))
212{ 211{
213 struct cfsrvl *service; 212 struct cfsrvl *service;
214 213
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index e597733affb8..26a4e4e3a767 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -167,7 +167,7 @@ static void chnl_put(struct cflayer *lyr)
167} 167}
168 168
169static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, 169static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
170 int phyid) 170 int phyid)
171{ 171{
172 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); 172 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
173 pr_debug("NET flowctrl func called flow: %s\n", 173 pr_debug("NET flowctrl func called flow: %s\n",
@@ -443,7 +443,7 @@ nla_put_failure:
443} 443}
444 444
445static void caif_netlink_parms(struct nlattr *data[], 445static void caif_netlink_parms(struct nlattr *data[],
446 struct caif_connect_request *conn_req) 446 struct caif_connect_request *conn_req)
447{ 447{
448 if (!data) { 448 if (!data) {
449 pr_warn("no params data found\n"); 449 pr_warn("no params data found\n");
@@ -488,7 +488,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
488} 488}
489 489
490static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[], 490static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[],
491 struct nlattr *data[]) 491 struct nlattr *data[])
492{ 492{
493 struct chnl_net *caifdev; 493 struct chnl_net *caifdev;
494 ASSERT_RTNL(); 494 ASSERT_RTNL();
diff --git a/net/can/af_can.c b/net/can/af_can.c
index c48e5220bbac..c4e50852c9f4 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -525,7 +525,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
525 525
526 d = find_dev_rcv_lists(dev); 526 d = find_dev_rcv_lists(dev);
527 if (!d) { 527 if (!d) {
528 printk(KERN_ERR "BUG: receive list not found for " 528 pr_err("BUG: receive list not found for "
529 "dev %s, id %03X, mask %03X\n", 529 "dev %s, id %03X, mask %03X\n",
530 DNAME(dev), can_id, mask); 530 DNAME(dev), can_id, mask);
531 goto out; 531 goto out;
@@ -546,16 +546,13 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
546 } 546 }
547 547
548 /* 548 /*
549 * Check for bugs in CAN protocol implementations: 549 * Check for bugs in CAN protocol implementations using af_can.c:
550 * If no matching list item was found, the list cursor variable next 550 * 'r' will be NULL if no matching list item was found for removal.
551 * will be NULL, while r will point to the last item of the list.
552 */ 551 */
553 552
554 if (!r) { 553 if (!r) {
555 printk(KERN_ERR "BUG: receive list entry not found for " 554 WARN(1, "BUG: receive list entry not found for dev %s, "
556 "dev %s, id %03X, mask %03X\n", 555 "id %03X, mask %03X\n", DNAME(dev), can_id, mask);
557 DNAME(dev), can_id, mask);
558 r = NULL;
559 goto out; 556 goto out;
560 } 557 }
561 558
@@ -749,8 +746,7 @@ int can_proto_register(const struct can_proto *cp)
749 int err = 0; 746 int err = 0;
750 747
751 if (proto < 0 || proto >= CAN_NPROTO) { 748 if (proto < 0 || proto >= CAN_NPROTO) {
752 printk(KERN_ERR "can: protocol number %d out of range\n", 749 pr_err("can: protocol number %d out of range\n", proto);
753 proto);
754 return -EINVAL; 750 return -EINVAL;
755 } 751 }
756 752
@@ -761,8 +757,7 @@ int can_proto_register(const struct can_proto *cp)
761 mutex_lock(&proto_tab_lock); 757 mutex_lock(&proto_tab_lock);
762 758
763 if (proto_tab[proto]) { 759 if (proto_tab[proto]) {
764 printk(KERN_ERR "can: protocol %d already registered\n", 760 pr_err("can: protocol %d already registered\n", proto);
765 proto);
766 err = -EBUSY; 761 err = -EBUSY;
767 } else 762 } else
768 RCU_INIT_POINTER(proto_tab[proto], cp); 763 RCU_INIT_POINTER(proto_tab[proto], cp);
@@ -816,11 +811,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
816 811
817 /* create new dev_rcv_lists for this device */ 812 /* create new dev_rcv_lists for this device */
818 d = kzalloc(sizeof(*d), GFP_KERNEL); 813 d = kzalloc(sizeof(*d), GFP_KERNEL);
819 if (!d) { 814 if (!d)
820 printk(KERN_ERR
821 "can: allocation of receive list failed\n");
822 return NOTIFY_DONE; 815 return NOTIFY_DONE;
823 }
824 BUG_ON(dev->ml_priv); 816 BUG_ON(dev->ml_priv);
825 dev->ml_priv = d; 817 dev->ml_priv = d;
826 818
@@ -838,8 +830,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
838 dev->ml_priv = NULL; 830 dev->ml_priv = NULL;
839 } 831 }
840 } else 832 } else
841 printk(KERN_ERR "can: notifier: receive list not " 833 pr_err("can: notifier: receive list not found for dev "
842 "found for dev %s\n", dev->name); 834 "%s\n", dev->name);
843 835
844 spin_unlock(&can_rcvlists_lock); 836 spin_unlock(&can_rcvlists_lock);
845 837
@@ -927,7 +919,7 @@ static __exit void can_exit(void)
927 /* remove created dev_rcv_lists from still registered CAN devices */ 919 /* remove created dev_rcv_lists from still registered CAN devices */
928 rcu_read_lock(); 920 rcu_read_lock();
929 for_each_netdev_rcu(&init_net, dev) { 921 for_each_netdev_rcu(&init_net, dev) {
930 if (dev->type == ARPHRD_CAN && dev->ml_priv){ 922 if (dev->type == ARPHRD_CAN && dev->ml_priv) {
931 923
932 struct dev_rcv_lists *d = dev->ml_priv; 924 struct dev_rcv_lists *d = dev->ml_priv;
933 925
diff --git a/net/can/gw.c b/net/can/gw.c
index 117814a7e73c..3ee690e8c7d3 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -778,8 +778,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
778 return 0; 778 return 0;
779} 779}
780 780
781static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, 781static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
782 void *arg)
783{ 782{
784 struct rtcanmsg *r; 783 struct rtcanmsg *r;
785 struct cgw_job *gwj; 784 struct cgw_job *gwj;
@@ -868,7 +867,7 @@ static void cgw_remove_all_jobs(void)
868 } 867 }
869} 868}
870 869
871static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 870static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
872{ 871{
873 struct cgw_job *gwj = NULL; 872 struct cgw_job *gwj = NULL;
874 struct hlist_node *nx; 873 struct hlist_node *nx;
diff --git a/net/can/raw.c b/net/can/raw.c
index c1764e41ddaf..1085e65f848e 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -711,9 +711,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
711 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 711 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
712 if (err < 0) 712 if (err < 0)
713 goto free_skb; 713 goto free_skb;
714 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 714
715 if (err < 0) 715 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
716 goto free_skb;
717 716
718 skb->dev = dev; 717 skb->dev = dev;
719 skb->sk = sk; 718 skb->sk = sk;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3f9dc6..ebba65d7e0da 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -749,7 +749,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
749 749
750 /* exceptional events? */ 750 /* exceptional events? */
751 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 751 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
752 mask |= POLLERR; 752 mask |= POLLERR |
753 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
754
753 if (sk->sk_shutdown & RCV_SHUTDOWN) 755 if (sk->sk_shutdown & RCV_SHUTDOWN)
754 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 756 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
755 if (sk->sk_shutdown == SHUTDOWN_MASK) 757 if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/core/dev.c b/net/core/dev.c
index b24ab0e98eb4..9e26b8d9eafe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -200,7 +200,7 @@ static inline void rps_unlock(struct softnet_data *sd)
200} 200}
201 201
202/* Device list insertion */ 202/* Device list insertion */
203static int list_netdevice(struct net_device *dev) 203static void list_netdevice(struct net_device *dev)
204{ 204{
205 struct net *net = dev_net(dev); 205 struct net *net = dev_net(dev);
206 206
@@ -214,8 +214,6 @@ static int list_netdevice(struct net_device *dev)
214 write_unlock_bh(&dev_base_lock); 214 write_unlock_bh(&dev_base_lock);
215 215
216 dev_base_seq_inc(net); 216 dev_base_seq_inc(net);
217
218 return 0;
219} 217}
220 218
221/* Device list removal 219/* Device list removal
@@ -2210,30 +2208,40 @@ out:
2210} 2208}
2211EXPORT_SYMBOL(skb_checksum_help); 2209EXPORT_SYMBOL(skb_checksum_help);
2212 2210
2213/** 2211__be16 skb_network_protocol(struct sk_buff *skb)
2214 * skb_mac_gso_segment - mac layer segmentation handler.
2215 * @skb: buffer to segment
2216 * @features: features for the output path (see dev->features)
2217 */
2218struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2219 netdev_features_t features)
2220{ 2212{
2221 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2222 struct packet_offload *ptype;
2223 __be16 type = skb->protocol; 2213 __be16 type = skb->protocol;
2224 int vlan_depth = ETH_HLEN; 2214 int vlan_depth = ETH_HLEN;
2225 2215
2226 while (type == htons(ETH_P_8021Q)) { 2216 while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
2227 struct vlan_hdr *vh; 2217 struct vlan_hdr *vh;
2228 2218
2229 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) 2219 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
2230 return ERR_PTR(-EINVAL); 2220 return 0;
2231 2221
2232 vh = (struct vlan_hdr *)(skb->data + vlan_depth); 2222 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
2233 type = vh->h_vlan_encapsulated_proto; 2223 type = vh->h_vlan_encapsulated_proto;
2234 vlan_depth += VLAN_HLEN; 2224 vlan_depth += VLAN_HLEN;
2235 } 2225 }
2236 2226
2227 return type;
2228}
2229
2230/**
2231 * skb_mac_gso_segment - mac layer segmentation handler.
2232 * @skb: buffer to segment
2233 * @features: features for the output path (see dev->features)
2234 */
2235struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2236 netdev_features_t features)
2237{
2238 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2239 struct packet_offload *ptype;
2240 __be16 type = skb_network_protocol(skb);
2241
2242 if (unlikely(!type))
2243 return ERR_PTR(-EINVAL);
2244
2237 __skb_pull(skb, skb->mac_len); 2245 __skb_pull(skb, skb->mac_len);
2238 2246
2239 rcu_read_lock(); 2247 rcu_read_lock();
@@ -2400,24 +2408,12 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2400 return 0; 2408 return 0;
2401} 2409}
2402 2410
2403static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
2404{
2405 return ((features & NETIF_F_GEN_CSUM) ||
2406 ((features & NETIF_F_V4_CSUM) &&
2407 protocol == htons(ETH_P_IP)) ||
2408 ((features & NETIF_F_V6_CSUM) &&
2409 protocol == htons(ETH_P_IPV6)) ||
2410 ((features & NETIF_F_FCOE_CRC) &&
2411 protocol == htons(ETH_P_FCOE)));
2412}
2413
2414static netdev_features_t harmonize_features(struct sk_buff *skb, 2411static netdev_features_t harmonize_features(struct sk_buff *skb,
2415 __be16 protocol, netdev_features_t features) 2412 __be16 protocol, netdev_features_t features)
2416{ 2413{
2417 if (skb->ip_summed != CHECKSUM_NONE && 2414 if (skb->ip_summed != CHECKSUM_NONE &&
2418 !can_checksum_protocol(features, protocol)) { 2415 !can_checksum_protocol(features, protocol)) {
2419 features &= ~NETIF_F_ALL_CSUM; 2416 features &= ~NETIF_F_ALL_CSUM;
2420 features &= ~NETIF_F_SG;
2421 } else if (illegal_highdma(skb->dev, skb)) { 2417 } else if (illegal_highdma(skb->dev, skb)) {
2422 features &= ~NETIF_F_SG; 2418 features &= ~NETIF_F_SG;
2423 } 2419 }
@@ -2433,20 +2429,22 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
2433 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) 2429 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
2434 features &= ~NETIF_F_GSO_MASK; 2430 features &= ~NETIF_F_GSO_MASK;
2435 2431
2436 if (protocol == htons(ETH_P_8021Q)) { 2432 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
2437 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2433 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2438 protocol = veh->h_vlan_encapsulated_proto; 2434 protocol = veh->h_vlan_encapsulated_proto;
2439 } else if (!vlan_tx_tag_present(skb)) { 2435 } else if (!vlan_tx_tag_present(skb)) {
2440 return harmonize_features(skb, protocol, features); 2436 return harmonize_features(skb, protocol, features);
2441 } 2437 }
2442 2438
2443 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); 2439 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
2440 NETIF_F_HW_VLAN_STAG_TX);
2444 2441
2445 if (protocol != htons(ETH_P_8021Q)) { 2442 if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) {
2446 return harmonize_features(skb, protocol, features); 2443 return harmonize_features(skb, protocol, features);
2447 } else { 2444 } else {
2448 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | 2445 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2449 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; 2446 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2447 NETIF_F_HW_VLAN_STAG_TX;
2450 return harmonize_features(skb, protocol, features); 2448 return harmonize_features(skb, protocol, features);
2451 } 2449 }
2452} 2450}
@@ -2487,8 +2485,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2487 features = netif_skb_features(skb); 2485 features = netif_skb_features(skb);
2488 2486
2489 if (vlan_tx_tag_present(skb) && 2487 if (vlan_tx_tag_present(skb) &&
2490 !(features & NETIF_F_HW_VLAN_TX)) { 2488 !vlan_hw_offload_capable(features, skb->vlan_proto)) {
2491 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); 2489 skb = __vlan_put_tag(skb, skb->vlan_proto,
2490 vlan_tx_tag_get(skb));
2492 if (unlikely(!skb)) 2491 if (unlikely(!skb))
2493 goto out; 2492 goto out;
2494 2493
@@ -2592,6 +2591,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
2592 */ 2591 */
2593 if (shinfo->gso_size) { 2592 if (shinfo->gso_size) {
2594 unsigned int hdr_len; 2593 unsigned int hdr_len;
2594 u16 gso_segs = shinfo->gso_segs;
2595 2595
2596 /* mac layer + network layer */ 2596 /* mac layer + network layer */
2597 hdr_len = skb_transport_header(skb) - skb_mac_header(skb); 2597 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
@@ -2601,7 +2601,12 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
2601 hdr_len += tcp_hdrlen(skb); 2601 hdr_len += tcp_hdrlen(skb);
2602 else 2602 else
2603 hdr_len += sizeof(struct udphdr); 2603 hdr_len += sizeof(struct udphdr);
2604 qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; 2604
2605 if (shinfo->gso_type & SKB_GSO_DODGY)
2606 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
2607 shinfo->gso_size);
2608
2609 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
2605 } 2610 }
2606} 2611}
2607 2612
@@ -3329,7 +3334,7 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3329 * netdev_rx_handler_unregister - unregister receive handler 3334 * netdev_rx_handler_unregister - unregister receive handler
3330 * @dev: device to unregister a handler from 3335 * @dev: device to unregister a handler from
3331 * 3336 *
3332 * Unregister a receive hander from a device. 3337 * Unregister a receive handler from a device.
3333 * 3338 *
3334 * The caller must hold the rtnl_mutex. 3339 * The caller must hold the rtnl_mutex.
3335 */ 3340 */
@@ -3358,6 +3363,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3358 case __constant_htons(ETH_P_IP): 3363 case __constant_htons(ETH_P_IP):
3359 case __constant_htons(ETH_P_IPV6): 3364 case __constant_htons(ETH_P_IPV6):
3360 case __constant_htons(ETH_P_8021Q): 3365 case __constant_htons(ETH_P_8021Q):
3366 case __constant_htons(ETH_P_8021AD):
3361 return true; 3367 return true;
3362 default: 3368 default:
3363 return false; 3369 return false;
@@ -3398,7 +3404,8 @@ another_round:
3398 3404
3399 __this_cpu_inc(softnet_data.processed); 3405 __this_cpu_inc(softnet_data.processed);
3400 3406
3401 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { 3407 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
3408 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
3402 skb = vlan_untag(skb); 3409 skb = vlan_untag(skb);
3403 if (unlikely(!skb)) 3410 if (unlikely(!skb))
3404 goto unlock; 3411 goto unlock;
@@ -4066,6 +4073,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4066 napi->gro_list = NULL; 4073 napi->gro_list = NULL;
4067 napi->skb = NULL; 4074 napi->skb = NULL;
4068 napi->poll = poll; 4075 napi->poll = poll;
4076 if (weight > NAPI_POLL_WEIGHT)
4077 pr_err_once("netif_napi_add() called with weight %d on device %s\n",
4078 weight, dev->name);
4069 napi->weight = weight; 4079 napi->weight = weight;
4070 list_add(&napi->dev_list, &dev->napi_list); 4080 list_add(&napi->dev_list, &dev->napi_list);
4071 napi->dev = dev; 4081 napi->dev = dev;
@@ -4927,20 +4937,25 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
4927 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 4937 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4928 } 4938 }
4929 4939
4930 /* Fix illegal SG+CSUM combinations. */
4931 if ((features & NETIF_F_SG) &&
4932 !(features & NETIF_F_ALL_CSUM)) {
4933 netdev_dbg(dev,
4934 "Dropping NETIF_F_SG since no checksum feature.\n");
4935 features &= ~NETIF_F_SG;
4936 }
4937
4938 /* TSO requires that SG is present as well. */ 4940 /* TSO requires that SG is present as well. */
4939 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { 4941 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
4940 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); 4942 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
4941 features &= ~NETIF_F_ALL_TSO; 4943 features &= ~NETIF_F_ALL_TSO;
4942 } 4944 }
4943 4945
4946 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
4947 !(features & NETIF_F_IP_CSUM)) {
4948 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
4949 features &= ~NETIF_F_TSO;
4950 features &= ~NETIF_F_TSO_ECN;
4951 }
4952
4953 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
4954 !(features & NETIF_F_IPV6_CSUM)) {
4955 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
4956 features &= ~NETIF_F_TSO6;
4957 }
4958
4944 /* TSO ECN requires that TSO is present as well. */ 4959 /* TSO ECN requires that TSO is present as well. */
4945 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) 4960 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
4946 features &= ~NETIF_F_TSO_ECN; 4961 features &= ~NETIF_F_TSO_ECN;
@@ -5171,7 +5186,8 @@ int register_netdevice(struct net_device *dev)
5171 } 5186 }
5172 } 5187 }
5173 5188
5174 if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && 5189 if (((dev->hw_features | dev->features) &
5190 NETIF_F_HW_VLAN_CTAG_FILTER) &&
5175 (!dev->netdev_ops->ndo_vlan_rx_add_vid || 5191 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
5176 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { 5192 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
5177 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); 5193 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
@@ -5208,6 +5224,10 @@ int register_netdevice(struct net_device *dev)
5208 */ 5224 */
5209 dev->vlan_features |= NETIF_F_HIGHDMA; 5225 dev->vlan_features |= NETIF_F_HIGHDMA;
5210 5226
5227 /* Make NETIF_F_SG inheritable to tunnel devices.
5228 */
5229 dev->hw_enc_features |= NETIF_F_SG;
5230
5211 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5231 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5212 ret = notifier_to_errno(ret); 5232 ret = notifier_to_errno(ret);
5213 if (ret) 5233 if (ret)
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index abdc9e6ef33e..c013f38482a1 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -22,7 +22,8 @@
22 22
23static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, 23static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
24 const unsigned char *addr, int addr_len, 24 const unsigned char *addr, int addr_len,
25 unsigned char addr_type, bool global) 25 unsigned char addr_type, bool global,
26 bool sync)
26{ 27{
27 struct netdev_hw_addr *ha; 28 struct netdev_hw_addr *ha;
28 int alloc_size; 29 int alloc_size;
@@ -37,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
37 ha->type = addr_type; 38 ha->type = addr_type;
38 ha->refcount = 1; 39 ha->refcount = 1;
39 ha->global_use = global; 40 ha->global_use = global;
40 ha->synced = 0; 41 ha->synced = sync;
41 list_add_tail_rcu(&ha->list, &list->list); 42 list_add_tail_rcu(&ha->list, &list->list);
42 list->count++; 43 list->count++;
43 44
@@ -46,7 +47,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
46 47
47static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, 48static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
48 const unsigned char *addr, int addr_len, 49 const unsigned char *addr, int addr_len,
49 unsigned char addr_type, bool global) 50 unsigned char addr_type, bool global, bool sync)
50{ 51{
51 struct netdev_hw_addr *ha; 52 struct netdev_hw_addr *ha;
52 53
@@ -63,43 +64,62 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
63 else 64 else
64 ha->global_use = true; 65 ha->global_use = true;
65 } 66 }
67 if (sync) {
68 if (ha->synced)
69 return 0;
70 else
71 ha->synced = true;
72 }
66 ha->refcount++; 73 ha->refcount++;
67 return 0; 74 return 0;
68 } 75 }
69 } 76 }
70 77
71 return __hw_addr_create_ex(list, addr, addr_len, addr_type, global); 78 return __hw_addr_create_ex(list, addr, addr_len, addr_type, global,
79 sync);
72} 80}
73 81
74static int __hw_addr_add(struct netdev_hw_addr_list *list, 82static int __hw_addr_add(struct netdev_hw_addr_list *list,
75 const unsigned char *addr, int addr_len, 83 const unsigned char *addr, int addr_len,
76 unsigned char addr_type) 84 unsigned char addr_type)
77{ 85{
78 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); 86 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false);
87}
88
89static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
90 struct netdev_hw_addr *ha, bool global,
91 bool sync)
92{
93 if (global && !ha->global_use)
94 return -ENOENT;
95
96 if (sync && !ha->synced)
97 return -ENOENT;
98
99 if (global)
100 ha->global_use = false;
101
102 if (sync)
103 ha->synced = false;
104
105 if (--ha->refcount)
106 return 0;
107 list_del_rcu(&ha->list);
108 kfree_rcu(ha, rcu_head);
109 list->count--;
110 return 0;
79} 111}
80 112
81static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, 113static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
82 const unsigned char *addr, int addr_len, 114 const unsigned char *addr, int addr_len,
83 unsigned char addr_type, bool global) 115 unsigned char addr_type, bool global, bool sync)
84{ 116{
85 struct netdev_hw_addr *ha; 117 struct netdev_hw_addr *ha;
86 118
87 list_for_each_entry(ha, &list->list, list) { 119 list_for_each_entry(ha, &list->list, list) {
88 if (!memcmp(ha->addr, addr, addr_len) && 120 if (!memcmp(ha->addr, addr, addr_len) &&
89 (ha->type == addr_type || !addr_type)) { 121 (ha->type == addr_type || !addr_type))
90 if (global) { 122 return __hw_addr_del_entry(list, ha, global, sync);
91 if (!ha->global_use)
92 break;
93 else
94 ha->global_use = false;
95 }
96 if (--ha->refcount)
97 return 0;
98 list_del_rcu(&ha->list);
99 kfree_rcu(ha, rcu_head);
100 list->count--;
101 return 0;
102 }
103 } 123 }
104 return -ENOENT; 124 return -ENOENT;
105} 125}
@@ -108,7 +128,57 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list,
108 const unsigned char *addr, int addr_len, 128 const unsigned char *addr, int addr_len,
109 unsigned char addr_type) 129 unsigned char addr_type)
110{ 130{
111 return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); 131 return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false);
132}
133
134static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
135 struct netdev_hw_addr *ha,
136 int addr_len)
137{
138 int err;
139
140 err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
141 false, true);
142 if (err)
143 return err;
144 ha->sync_cnt++;
145 ha->refcount++;
146
147 return 0;
148}
149
150static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list,
151 struct netdev_hw_addr_list *from_list,
152 struct netdev_hw_addr *ha,
153 int addr_len)
154{
155 int err;
156
157 err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type,
158 false, true);
159 if (err)
160 return;
161 ha->sync_cnt--;
162 __hw_addr_del_entry(from_list, ha, false, true);
163}
164
165static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
166 struct netdev_hw_addr_list *from_list,
167 int addr_len)
168{
169 int err = 0;
170 struct netdev_hw_addr *ha, *tmp;
171
172 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
173 if (ha->sync_cnt == ha->refcount) {
174 __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
175 } else {
176 err = __hw_addr_sync_one(to_list, ha, addr_len);
177 if (err)
178 break;
179 }
180 }
181 return err;
112} 182}
113 183
114int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, 184int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
@@ -152,6 +222,11 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
152} 222}
153EXPORT_SYMBOL(__hw_addr_del_multiple); 223EXPORT_SYMBOL(__hw_addr_del_multiple);
154 224
225/* This function only works where there is a strict 1-1 relationship
226 * between source and destionation of they synch. If you ever need to
227 * sync addresses to more then 1 destination, you need to use
228 * __hw_addr_sync_multiple().
229 */
155int __hw_addr_sync(struct netdev_hw_addr_list *to_list, 230int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
156 struct netdev_hw_addr_list *from_list, 231 struct netdev_hw_addr_list *from_list,
157 int addr_len) 232 int addr_len)
@@ -160,17 +235,12 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
160 struct netdev_hw_addr *ha, *tmp; 235 struct netdev_hw_addr *ha, *tmp;
161 236
162 list_for_each_entry_safe(ha, tmp, &from_list->list, list) { 237 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
163 if (!ha->synced) { 238 if (!ha->sync_cnt) {
164 err = __hw_addr_add(to_list, ha->addr, 239 err = __hw_addr_sync_one(to_list, ha, addr_len);
165 addr_len, ha->type);
166 if (err) 240 if (err)
167 break; 241 break;
168 ha->synced++; 242 } else if (ha->refcount == 1)
169 ha->refcount++; 243 __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
170 } else if (ha->refcount == 1) {
171 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
172 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
173 }
174 } 244 }
175 return err; 245 return err;
176} 246}
@@ -183,13 +253,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
183 struct netdev_hw_addr *ha, *tmp; 253 struct netdev_hw_addr *ha, *tmp;
184 254
185 list_for_each_entry_safe(ha, tmp, &from_list->list, list) { 255 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
186 if (ha->synced) { 256 if (ha->sync_cnt)
187 __hw_addr_del(to_list, ha->addr, 257 __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
188 addr_len, ha->type);
189 ha->synced--;
190 __hw_addr_del(from_list, ha->addr,
191 addr_len, ha->type);
192 }
193 } 258 }
194} 259}
195EXPORT_SYMBOL(__hw_addr_unsync); 260EXPORT_SYMBOL(__hw_addr_unsync);
@@ -406,7 +471,7 @@ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
406 } 471 }
407 } 472 }
408 err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len, 473 err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len,
409 NETDEV_HW_ADDR_T_UNICAST, true); 474 NETDEV_HW_ADDR_T_UNICAST, true, false);
410 if (!err) 475 if (!err)
411 __dev_set_rx_mode(dev); 476 __dev_set_rx_mode(dev);
412out: 477out:
@@ -469,7 +534,8 @@ EXPORT_SYMBOL(dev_uc_del);
469 * locked by netif_addr_lock_bh. 534 * locked by netif_addr_lock_bh.
470 * 535 *
471 * This function is intended to be called from the dev->set_rx_mode 536 * This function is intended to be called from the dev->set_rx_mode
472 * function of layered software devices. 537 * function of layered software devices. This function assumes that
538 * addresses will only ever be synced to the @to devices and no other.
473 */ 539 */
474int dev_uc_sync(struct net_device *to, struct net_device *from) 540int dev_uc_sync(struct net_device *to, struct net_device *from)
475{ 541{
@@ -488,6 +554,36 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
488EXPORT_SYMBOL(dev_uc_sync); 554EXPORT_SYMBOL(dev_uc_sync);
489 555
490/** 556/**
557 * dev_uc_sync_multiple - Synchronize device's unicast list to another
558 * device, but allow for multiple calls to sync to multiple devices.
559 * @to: destination device
560 * @from: source device
561 *
562 * Add newly added addresses to the destination device and release
563 * addresses that have been deleted from the source. The source device
564 * must be locked by netif_addr_lock_bh.
565 *
566 * This function is intended to be called from the dev->set_rx_mode
567 * function of layered software devices. It allows for a single source
568 * device to be synced to multiple destination devices.
569 */
570int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
571{
572 int err = 0;
573
574 if (to->addr_len != from->addr_len)
575 return -EINVAL;
576
577 netif_addr_lock_nested(to);
578 err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
579 if (!err)
580 __dev_set_rx_mode(to);
581 netif_addr_unlock(to);
582 return err;
583}
584EXPORT_SYMBOL(dev_uc_sync_multiple);
585
586/**
491 * dev_uc_unsync - Remove synchronized addresses from the destination device 587 * dev_uc_unsync - Remove synchronized addresses from the destination device
492 * @to: destination device 588 * @to: destination device
493 * @from: source device 589 * @from: source device
@@ -559,7 +655,7 @@ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
559 } 655 }
560 } 656 }
561 err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len, 657 err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len,
562 NETDEV_HW_ADDR_T_MULTICAST, true); 658 NETDEV_HW_ADDR_T_MULTICAST, true, false);
563 if (!err) 659 if (!err)
564 __dev_set_rx_mode(dev); 660 __dev_set_rx_mode(dev);
565out: 661out:
@@ -575,7 +671,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
575 671
576 netif_addr_lock_bh(dev); 672 netif_addr_lock_bh(dev);
577 err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, 673 err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
578 NETDEV_HW_ADDR_T_MULTICAST, global); 674 NETDEV_HW_ADDR_T_MULTICAST, global, false);
579 if (!err) 675 if (!err)
580 __dev_set_rx_mode(dev); 676 __dev_set_rx_mode(dev);
581 netif_addr_unlock_bh(dev); 677 netif_addr_unlock_bh(dev);
@@ -615,7 +711,7 @@ static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
615 711
616 netif_addr_lock_bh(dev); 712 netif_addr_lock_bh(dev);
617 err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, 713 err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
618 NETDEV_HW_ADDR_T_MULTICAST, global); 714 NETDEV_HW_ADDR_T_MULTICAST, global, false);
619 if (!err) 715 if (!err)
620 __dev_set_rx_mode(dev); 716 __dev_set_rx_mode(dev);
621 netif_addr_unlock_bh(dev); 717 netif_addr_unlock_bh(dev);
@@ -679,6 +775,36 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
679EXPORT_SYMBOL(dev_mc_sync); 775EXPORT_SYMBOL(dev_mc_sync);
680 776
681/** 777/**
778 * dev_mc_sync_multiple - Synchronize device's unicast list to another
779 * device, but allow for multiple calls to sync to multiple devices.
780 * @to: destination device
781 * @from: source device
782 *
783 * Add newly added addresses to the destination device and release
784 * addresses that have no users left. The source device must be
785 * locked by netif_addr_lock_bh.
786 *
787 * This function is intended to be called from the ndo_set_rx_mode
788 * function of layered software devices. It allows for a single
789 * source device to be synced to multiple destination devices.
790 */
791int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
792{
793 int err = 0;
794
795 if (to->addr_len != from->addr_len)
796 return -EINVAL;
797
798 netif_addr_lock_nested(to);
799 err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
800 if (!err)
801 __dev_set_rx_mode(to);
802 netif_addr_unlock(to);
803 return err;
804}
805EXPORT_SYMBOL(dev_mc_sync_multiple);
806
807/**
682 * dev_mc_unsync - Remove synchronized addresses from the destination device 808 * dev_mc_unsync - Remove synchronized addresses from the destination device
683 * @to: destination device 809 * @to: destination device
684 * @from: source device 810 * @from: source device
diff --git a/net/core/dst.c b/net/core/dst.c
index 35fd12f1a69c..df9cc810ec8e 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
320EXPORT_SYMBOL(__dst_destroy_metrics_generic); 320EXPORT_SYMBOL(__dst_destroy_metrics_generic);
321 321
322/** 322/**
323 * skb_dst_set_noref - sets skb dst, without a reference 323 * __skb_dst_set_noref - sets skb dst, without a reference
324 * @skb: buffer 324 * @skb: buffer
325 * @dst: dst entry 325 * @dst: dst entry
326 * @force: if force is set, use noref version even for DST_NOCACHE entries
326 * 327 *
327 * Sets skb dst, assuming a reference was not taken on dst 328 * Sets skb dst, assuming a reference was not taken on dst
328 * skb_dst_drop() should not dst_release() this dst 329 * skb_dst_drop() should not dst_release() this dst
329 */ 330 */
330void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) 331void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force)
331{ 332{
332 WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 333 WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
333 /* If dst not in cache, we must take a reference, because 334 /* If dst not in cache, we must take a reference, because
334 * dst_release() will destroy dst as soon as its refcount becomes zero 335 * dst_release() will destroy dst as soon as its refcount becomes zero
335 */ 336 */
336 if (unlikely(dst->flags & DST_NOCACHE)) { 337 if (unlikely((dst->flags & DST_NOCACHE) && !force)) {
337 dst_hold(dst); 338 dst_hold(dst);
338 skb_dst_set(skb, dst); 339 skb_dst_set(skb, dst);
339 } else { 340 } else {
340 skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; 341 skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
341 } 342 }
342} 343}
343EXPORT_SYMBOL(skb_dst_set_noref); 344EXPORT_SYMBOL(__skb_dst_set_noref);
344 345
345/* Dirty hack. We did it in 2.2 (in __dst_free), 346/* Dirty hack. We did it in 2.2 (in __dst_free),
346 * we have _very_ good reasons not to repeat 347 * we have _very_ good reasons not to repeat
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3e9b2c3e30f0..5a934ef90f8b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -60,10 +60,13 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
60 [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", 60 [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
61 [NETIF_F_HIGHDMA_BIT] = "highdma", 61 [NETIF_F_HIGHDMA_BIT] = "highdma",
62 [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", 62 [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
63 [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert", 63 [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-ctag-hw-insert",
64 64
65 [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse", 65 [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-ctag-hw-parse",
66 [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter", 66 [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-ctag-filter",
67 [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
68 [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
69 [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
67 [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", 70 [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
68 [NETIF_F_GSO_BIT] = "tx-generic-segmentation", 71 [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
69 [NETIF_F_LLTX_BIT] = "tx-lockless", 72 [NETIF_F_LLTX_BIT] = "tx-lockless",
@@ -78,6 +81,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
78 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", 81 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
79 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", 82 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
80 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", 83 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
84 [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
81 85
82 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", 86 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
83 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", 87 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
@@ -266,18 +270,19 @@ static int ethtool_set_one_feature(struct net_device *dev,
266 270
267#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \ 271#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
268 ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH) 272 ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
269#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \ 273#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \
270 NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH) 274 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \
275 NETIF_F_RXHASH)
271 276
272static u32 __ethtool_get_flags(struct net_device *dev) 277static u32 __ethtool_get_flags(struct net_device *dev)
273{ 278{
274 u32 flags = 0; 279 u32 flags = 0;
275 280
276 if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; 281 if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO;
277 if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN; 282 if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN;
278 if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN; 283 if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN;
279 if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; 284 if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE;
280 if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; 285 if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH;
281 286
282 return flags; 287 return flags;
283} 288}
@@ -290,8 +295,8 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
290 return -EINVAL; 295 return -EINVAL;
291 296
292 if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; 297 if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO;
293 if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX; 298 if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX;
294 if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX; 299 if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX;
295 if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; 300 if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE;
296 if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; 301 if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH;
297 302
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 58a4ba27dfe3..d5a9f8ead0d8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -266,7 +266,7 @@ errout:
266 return err; 266 return err;
267} 267}
268 268
269static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 269static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
270{ 270{
271 struct net *net = sock_net(skb->sk); 271 struct net *net = sock_net(skb->sk);
272 struct fib_rule_hdr *frh = nlmsg_data(nlh); 272 struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -415,7 +415,7 @@ errout:
415 return err; 415 return err;
416} 416}
417 417
418static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 418static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
419{ 419{
420 struct net *net = sock_net(skb->sk); 420 struct net *net = sock_net(skb->sk);
421 struct fib_rule_hdr *frh = nlmsg_data(nlh); 421 struct fib_rule_hdr *frh = nlmsg_data(nlh);
diff --git a/net/core/filter.c b/net/core/filter.c
index 2e20b55a7830..dad2a178f9f8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -348,6 +348,9 @@ load_b:
348 case BPF_S_ANC_VLAN_TAG_PRESENT: 348 case BPF_S_ANC_VLAN_TAG_PRESENT:
349 A = !!vlan_tx_tag_present(skb); 349 A = !!vlan_tx_tag_present(skb);
350 continue; 350 continue;
351 case BPF_S_ANC_PAY_OFFSET:
352 A = __skb_get_poff(skb);
353 continue;
351 case BPF_S_ANC_NLATTR: { 354 case BPF_S_ANC_NLATTR: {
352 struct nlattr *nla; 355 struct nlattr *nla;
353 356
@@ -612,6 +615,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
612 ANCILLARY(ALU_XOR_X); 615 ANCILLARY(ALU_XOR_X);
613 ANCILLARY(VLAN_TAG); 616 ANCILLARY(VLAN_TAG);
614 ANCILLARY(VLAN_TAG_PRESENT); 617 ANCILLARY(VLAN_TAG_PRESENT);
618 ANCILLARY(PAY_OFFSET);
615 } 619 }
616 620
617 /* ancillary operation unknown or unsupported */ 621 /* ancillary operation unknown or unsupported */
@@ -814,6 +818,7 @@ static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
814 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, 818 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
815 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, 819 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
816 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, 820 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
821 [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
817 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, 822 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
818 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, 823 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
819 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, 824 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
diff --git a/net/core/flow.c b/net/core/flow.c
index 2bfd081c59f7..7102f166482d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -323,6 +323,24 @@ static void flow_cache_flush_tasklet(unsigned long data)
323 complete(&info->completion); 323 complete(&info->completion);
324} 324}
325 325
326/*
327 * Return whether a cpu needs flushing. Conservatively, we assume
328 * the presence of any entries means the core may require flushing,
329 * since the flow_cache_ops.check() function may assume it's running
330 * on the same core as the per-cpu cache component.
331 */
332static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
333{
334 struct flow_cache_percpu *fcp;
335 int i;
336
337 fcp = per_cpu_ptr(fc->percpu, cpu);
338 for (i = 0; i < flow_cache_hash_size(fc); i++)
339 if (!hlist_empty(&fcp->hash_table[i]))
340 return 0;
341 return 1;
342}
343
326static void flow_cache_flush_per_cpu(void *data) 344static void flow_cache_flush_per_cpu(void *data)
327{ 345{
328 struct flow_flush_info *info = data; 346 struct flow_flush_info *info = data;
@@ -337,22 +355,40 @@ void flow_cache_flush(void)
337{ 355{
338 struct flow_flush_info info; 356 struct flow_flush_info info;
339 static DEFINE_MUTEX(flow_flush_sem); 357 static DEFINE_MUTEX(flow_flush_sem);
358 cpumask_var_t mask;
359 int i, self;
360
361 /* Track which cpus need flushing to avoid disturbing all cores. */
362 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
363 return;
364 cpumask_clear(mask);
340 365
341 /* Don't want cpus going down or up during this. */ 366 /* Don't want cpus going down or up during this. */
342 get_online_cpus(); 367 get_online_cpus();
343 mutex_lock(&flow_flush_sem); 368 mutex_lock(&flow_flush_sem);
344 info.cache = &flow_cache_global; 369 info.cache = &flow_cache_global;
345 atomic_set(&info.cpuleft, num_online_cpus()); 370 for_each_online_cpu(i)
371 if (!flow_cache_percpu_empty(info.cache, i))
372 cpumask_set_cpu(i, mask);
373 atomic_set(&info.cpuleft, cpumask_weight(mask));
374 if (atomic_read(&info.cpuleft) == 0)
375 goto done;
376
346 init_completion(&info.completion); 377 init_completion(&info.completion);
347 378
348 local_bh_disable(); 379 local_bh_disable();
349 smp_call_function(flow_cache_flush_per_cpu, &info, 0); 380 self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
350 flow_cache_flush_tasklet((unsigned long)&info); 381 on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
382 if (self)
383 flow_cache_flush_tasklet((unsigned long)&info);
351 local_bh_enable(); 384 local_bh_enable();
352 385
353 wait_for_completion(&info.completion); 386 wait_for_completion(&info.completion);
387
388done:
354 mutex_unlock(&flow_flush_sem); 389 mutex_unlock(&flow_flush_sem);
355 put_online_cpus(); 390 put_online_cpus();
391 free_cpumask_var(mask);
356} 392}
357 393
358static void flow_cache_flush_task(struct work_struct *work) 394static void flow_cache_flush_task(struct work_struct *work)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index e187bf06d673..00ee068efc1c 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -5,6 +5,10 @@
5#include <linux/if_vlan.h> 5#include <linux/if_vlan.h>
6#include <net/ip.h> 6#include <net/ip.h>
7#include <net/ipv6.h> 7#include <net/ipv6.h>
8#include <linux/igmp.h>
9#include <linux/icmp.h>
10#include <linux/sctp.h>
11#include <linux/dccp.h>
8#include <linux/if_tunnel.h> 12#include <linux/if_tunnel.h>
9#include <linux/if_pppox.h> 13#include <linux/if_pppox.h>
10#include <linux/ppp_defs.h> 14#include <linux/ppp_defs.h>
@@ -119,6 +123,17 @@ ipv6:
119 nhoff += 4; 123 nhoff += 4;
120 if (hdr->flags & GRE_SEQ) 124 if (hdr->flags & GRE_SEQ)
121 nhoff += 4; 125 nhoff += 4;
126 if (proto == htons(ETH_P_TEB)) {
127 const struct ethhdr *eth;
128 struct ethhdr _eth;
129
130 eth = skb_header_pointer(skb, nhoff,
131 sizeof(_eth), &_eth);
132 if (!eth)
133 return false;
134 proto = eth->h_proto;
135 nhoff += sizeof(*eth);
136 }
122 goto again; 137 goto again;
123 } 138 }
124 break; 139 break;
@@ -217,6 +232,59 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
217} 232}
218EXPORT_SYMBOL(__skb_tx_hash); 233EXPORT_SYMBOL(__skb_tx_hash);
219 234
235/* __skb_get_poff() returns the offset to the payload as far as it could
236 * be dissected. The main user is currently BPF, so that we can dynamically
237 * truncate packets without needing to push actual payload to the user
238 * space and can analyze headers only, instead.
239 */
240u32 __skb_get_poff(const struct sk_buff *skb)
241{
242 struct flow_keys keys;
243 u32 poff = 0;
244
245 if (!skb_flow_dissect(skb, &keys))
246 return 0;
247
248 poff += keys.thoff;
249 switch (keys.ip_proto) {
250 case IPPROTO_TCP: {
251 const struct tcphdr *tcph;
252 struct tcphdr _tcph;
253
254 tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph);
255 if (!tcph)
256 return poff;
257
258 poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4);
259 break;
260 }
261 case IPPROTO_UDP:
262 case IPPROTO_UDPLITE:
263 poff += sizeof(struct udphdr);
264 break;
265 /* For the rest, we do not really care about header
266 * extensions at this point for now.
267 */
268 case IPPROTO_ICMP:
269 poff += sizeof(struct icmphdr);
270 break;
271 case IPPROTO_ICMPV6:
272 poff += sizeof(struct icmp6hdr);
273 break;
274 case IPPROTO_IGMP:
275 poff += sizeof(struct igmphdr);
276 break;
277 case IPPROTO_DCCP:
278 poff += sizeof(struct dccp_hdr);
279 break;
280 case IPPROTO_SCTP:
281 poff += sizeof(struct sctphdr);
282 break;
283 }
284
285 return poff;
286}
287
220static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) 288static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
221{ 289{
222 if (unlikely(queue_index >= dev->real_num_tx_queues)) { 290 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3863b8f639c5..89a3a07d85fb 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -39,21 +39,13 @@
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/log2.h> 40#include <linux/log2.h>
41 41
42#define DEBUG
42#define NEIGH_DEBUG 1 43#define NEIGH_DEBUG 1
43 44#define neigh_dbg(level, fmt, ...) \
44#define NEIGH_PRINTK(x...) printk(x) 45do { \
45#define NEIGH_NOPRINTK(x...) do { ; } while(0) 46 if (level <= NEIGH_DEBUG) \
46#define NEIGH_PRINTK1 NEIGH_NOPRINTK 47 pr_debug(fmt, ##__VA_ARGS__); \
47#define NEIGH_PRINTK2 NEIGH_NOPRINTK 48} while (0)
48
49#if NEIGH_DEBUG >= 1
50#undef NEIGH_PRINTK1
51#define NEIGH_PRINTK1 NEIGH_PRINTK
52#endif
53#if NEIGH_DEBUG >= 2
54#undef NEIGH_PRINTK2
55#define NEIGH_PRINTK2 NEIGH_PRINTK
56#endif
57 49
58#define PNEIGH_HASHMASK 0xF 50#define PNEIGH_HASHMASK 0xF
59 51
@@ -246,7 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
246 n->nud_state = NUD_NOARP; 238 n->nud_state = NUD_NOARP;
247 else 239 else
248 n->nud_state = NUD_NONE; 240 n->nud_state = NUD_NONE;
249 NEIGH_PRINTK2("neigh %p is stray.\n", n); 241 neigh_dbg(2, "neigh %p is stray\n", n);
250 } 242 }
251 write_unlock(&n->lock); 243 write_unlock(&n->lock);
252 neigh_cleanup_and_release(n); 244 neigh_cleanup_and_release(n);
@@ -542,7 +534,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
542 lockdep_is_held(&tbl->lock))); 534 lockdep_is_held(&tbl->lock)));
543 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 535 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
544 write_unlock_bh(&tbl->lock); 536 write_unlock_bh(&tbl->lock);
545 NEIGH_PRINTK2("neigh %p is created.\n", n); 537 neigh_dbg(2, "neigh %p is created\n", n);
546 rc = n; 538 rc = n;
547out: 539out:
548 return rc; 540 return rc;
@@ -725,7 +717,7 @@ void neigh_destroy(struct neighbour *neigh)
725 dev_put(dev); 717 dev_put(dev);
726 neigh_parms_put(neigh->parms); 718 neigh_parms_put(neigh->parms);
727 719
728 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); 720 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
729 721
730 atomic_dec(&neigh->tbl->entries); 722 atomic_dec(&neigh->tbl->entries);
731 kfree_rcu(neigh, rcu); 723 kfree_rcu(neigh, rcu);
@@ -739,7 +731,7 @@ EXPORT_SYMBOL(neigh_destroy);
739 */ 731 */
740static void neigh_suspect(struct neighbour *neigh) 732static void neigh_suspect(struct neighbour *neigh)
741{ 733{
742 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); 734 neigh_dbg(2, "neigh %p is suspected\n", neigh);
743 735
744 neigh->output = neigh->ops->output; 736 neigh->output = neigh->ops->output;
745} 737}
@@ -751,7 +743,7 @@ static void neigh_suspect(struct neighbour *neigh)
751 */ 743 */
752static void neigh_connect(struct neighbour *neigh) 744static void neigh_connect(struct neighbour *neigh)
753{ 745{
754 NEIGH_PRINTK2("neigh %p is connected.\n", neigh); 746 neigh_dbg(2, "neigh %p is connected\n", neigh);
755 747
756 neigh->output = neigh->ops->connected_output; 748 neigh->output = neigh->ops->connected_output;
757} 749}
@@ -852,7 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh)
852 struct sk_buff *skb; 844 struct sk_buff *skb;
853 845
854 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 846 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
855 NEIGH_PRINTK2("neigh %p is failed.\n", neigh); 847 neigh_dbg(2, "neigh %p is failed\n", neigh);
856 neigh->updated = jiffies; 848 neigh->updated = jiffies;
857 849
858 /* It is very thin place. report_unreachable is very complicated 850 /* It is very thin place. report_unreachable is very complicated
@@ -904,17 +896,17 @@ static void neigh_timer_handler(unsigned long arg)
904 if (state & NUD_REACHABLE) { 896 if (state & NUD_REACHABLE) {
905 if (time_before_eq(now, 897 if (time_before_eq(now,
906 neigh->confirmed + neigh->parms->reachable_time)) { 898 neigh->confirmed + neigh->parms->reachable_time)) {
907 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); 899 neigh_dbg(2, "neigh %p is still alive\n", neigh);
908 next = neigh->confirmed + neigh->parms->reachable_time; 900 next = neigh->confirmed + neigh->parms->reachable_time;
909 } else if (time_before_eq(now, 901 } else if (time_before_eq(now,
910 neigh->used + neigh->parms->delay_probe_time)) { 902 neigh->used + neigh->parms->delay_probe_time)) {
911 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); 903 neigh_dbg(2, "neigh %p is delayed\n", neigh);
912 neigh->nud_state = NUD_DELAY; 904 neigh->nud_state = NUD_DELAY;
913 neigh->updated = jiffies; 905 neigh->updated = jiffies;
914 neigh_suspect(neigh); 906 neigh_suspect(neigh);
915 next = now + neigh->parms->delay_probe_time; 907 next = now + neigh->parms->delay_probe_time;
916 } else { 908 } else {
917 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); 909 neigh_dbg(2, "neigh %p is suspected\n", neigh);
918 neigh->nud_state = NUD_STALE; 910 neigh->nud_state = NUD_STALE;
919 neigh->updated = jiffies; 911 neigh->updated = jiffies;
920 neigh_suspect(neigh); 912 neigh_suspect(neigh);
@@ -923,14 +915,14 @@ static void neigh_timer_handler(unsigned long arg)
923 } else if (state & NUD_DELAY) { 915 } else if (state & NUD_DELAY) {
924 if (time_before_eq(now, 916 if (time_before_eq(now,
925 neigh->confirmed + neigh->parms->delay_probe_time)) { 917 neigh->confirmed + neigh->parms->delay_probe_time)) {
926 NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); 918 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
927 neigh->nud_state = NUD_REACHABLE; 919 neigh->nud_state = NUD_REACHABLE;
928 neigh->updated = jiffies; 920 neigh->updated = jiffies;
929 neigh_connect(neigh); 921 neigh_connect(neigh);
930 notify = 1; 922 notify = 1;
931 next = neigh->confirmed + neigh->parms->reachable_time; 923 next = neigh->confirmed + neigh->parms->reachable_time;
932 } else { 924 } else {
933 NEIGH_PRINTK2("neigh %p is probed.\n", neigh); 925 neigh_dbg(2, "neigh %p is probed\n", neigh);
934 neigh->nud_state = NUD_PROBE; 926 neigh->nud_state = NUD_PROBE;
935 neigh->updated = jiffies; 927 neigh->updated = jiffies;
936 atomic_set(&neigh->probes, 0); 928 atomic_set(&neigh->probes, 0);
@@ -997,7 +989,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
997 return 1; 989 return 1;
998 } 990 }
999 } else if (neigh->nud_state & NUD_STALE) { 991 } else if (neigh->nud_state & NUD_STALE) {
1000 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); 992 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1001 neigh->nud_state = NUD_DELAY; 993 neigh->nud_state = NUD_DELAY;
1002 neigh->updated = jiffies; 994 neigh->updated = jiffies;
1003 neigh_add_timer(neigh, 995 neigh_add_timer(neigh,
@@ -1320,8 +1312,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1320out: 1312out:
1321 return rc; 1313 return rc;
1322discard: 1314discard:
1323 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", 1315 neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1324 dst, neigh);
1325out_kfree_skb: 1316out_kfree_skb:
1326 rc = -EINVAL; 1317 rc = -EINVAL;
1327 kfree_skb(skb); 1318 kfree_skb(skb);
@@ -1498,7 +1489,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1498 } 1489 }
1499 } 1490 }
1500 write_unlock_bh(&tbl->lock); 1491 write_unlock_bh(&tbl->lock);
1501 NEIGH_PRINTK1("neigh_parms_release: not found\n"); 1492 neigh_dbg(1, "%s: not found\n", __func__);
1502} 1493}
1503EXPORT_SYMBOL(neigh_parms_release); 1494EXPORT_SYMBOL(neigh_parms_release);
1504 1495
@@ -1613,7 +1604,7 @@ int neigh_table_clear(struct neigh_table *tbl)
1613} 1604}
1614EXPORT_SYMBOL(neigh_table_clear); 1605EXPORT_SYMBOL(neigh_table_clear);
1615 1606
1616static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1607static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1617{ 1608{
1618 struct net *net = sock_net(skb->sk); 1609 struct net *net = sock_net(skb->sk);
1619 struct ndmsg *ndm; 1610 struct ndmsg *ndm;
@@ -1677,7 +1668,7 @@ out:
1677 return err; 1668 return err;
1678} 1669}
1679 1670
1680static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1671static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1681{ 1672{
1682 struct net *net = sock_net(skb->sk); 1673 struct net *net = sock_net(skb->sk);
1683 struct ndmsg *ndm; 1674 struct ndmsg *ndm;
@@ -1955,7 +1946,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1955 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 1946 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1956}; 1947};
1957 1948
1958static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1949static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1959{ 1950{
1960 struct net *net = sock_net(skb->sk); 1951 struct net *net = sock_net(skb->sk);
1961 struct neigh_table *tbl; 1952 struct neigh_table *tbl;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 3174f1998ee6..569d355fec3e 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -271,7 +271,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
271 else 271 else
272 seq_printf(seq, "%04x", ntohs(pt->type)); 272 seq_printf(seq, "%04x", ntohs(pt->type));
273 273
274 seq_printf(seq, " %-8s %pF\n", 274 seq_printf(seq, " %-8s %pf\n",
275 pt->dev ? pt->dev->name : "", pt->func); 275 pt->dev ? pt->dev->name : "", pt->func);
276 } 276 }
277 277
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index fa32899006a2..209d84253dd5 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -47,7 +47,7 @@ static struct sk_buff_head skb_pool;
47 47
48static atomic_t trapped; 48static atomic_t trapped;
49 49
50static struct srcu_struct netpoll_srcu; 50DEFINE_STATIC_SRCU(netpoll_srcu);
51 51
52#define USEC_PER_POLL 50 52#define USEC_PER_POLL 50
53#define NETPOLL_RX_ENABLED 1 53#define NETPOLL_RX_ENABLED 1
@@ -383,8 +383,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
383 if (__netif_tx_trylock(txq)) { 383 if (__netif_tx_trylock(txq)) {
384 if (!netif_xmit_stopped(txq)) { 384 if (!netif_xmit_stopped(txq)) {
385 if (vlan_tx_tag_present(skb) && 385 if (vlan_tx_tag_present(skb) &&
386 !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) { 386 !vlan_hw_offload_capable(netif_skb_features(skb),
387 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); 387 skb->vlan_proto)) {
388 skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
388 if (unlikely(!skb)) 389 if (unlikely(!skb))
389 break; 390 break;
390 skb->vlan_tci = 0; 391 skb->vlan_tci = 0;
@@ -1212,7 +1213,6 @@ EXPORT_SYMBOL(netpoll_setup);
1212static int __init netpoll_init(void) 1213static int __init netpoll_init(void)
1213{ 1214{
1214 skb_queue_head_init(&skb_pool); 1215 skb_queue_head_init(&skb_pool);
1215 init_srcu_struct(&netpoll_srcu);
1216 return 0; 1216 return 0;
1217} 1217}
1218core_initcall(netpoll_init); 1218core_initcall(netpoll_init);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 23854b51a259..18af08a73f0a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -517,32 +517,6 @@ out:
517 return err; 517 return err;
518} 518}
519 519
520static const int rtm_min[RTM_NR_FAMILIES] =
521{
522 [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
523 [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
524 [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)),
525 [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
526 [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
527 [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
528 [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
529 [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)),
530 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
531 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
532};
533
534static const int rta_max[RTM_NR_FAMILIES] =
535{
536 [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX,
537 [RTM_FAM(RTM_NEWADDR)] = IFA_MAX,
538 [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX,
539 [RTM_FAM(RTM_NEWRULE)] = FRA_MAX,
540 [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX,
541 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
542 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
543 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
544};
545
546int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) 520int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
547{ 521{
548 struct sock *rtnl = net->rtnl; 522 struct sock *rtnl = net->rtnl;
@@ -1539,7 +1513,7 @@ errout:
1539 return err; 1513 return err;
1540} 1514}
1541 1515
1542static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1516static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
1543{ 1517{
1544 struct net *net = sock_net(skb->sk); 1518 struct net *net = sock_net(skb->sk);
1545 struct ifinfomsg *ifm; 1519 struct ifinfomsg *ifm;
@@ -1580,7 +1554,7 @@ errout:
1580 return err; 1554 return err;
1581} 1555}
1582 1556
1583static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1557static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
1584{ 1558{
1585 struct net *net = sock_net(skb->sk); 1559 struct net *net = sock_net(skb->sk);
1586 const struct rtnl_link_ops *ops; 1560 const struct rtnl_link_ops *ops;
@@ -1711,7 +1685,7 @@ static int rtnl_group_changelink(struct net *net, int group,
1711 return 0; 1685 return 0;
1712} 1686}
1713 1687
1714static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1688static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
1715{ 1689{
1716 struct net *net = sock_net(skb->sk); 1690 struct net *net = sock_net(skb->sk);
1717 const struct rtnl_link_ops *ops; 1691 const struct rtnl_link_ops *ops;
@@ -1866,7 +1840,7 @@ out:
1866 } 1840 }
1867} 1841}
1868 1842
1869static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1843static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)
1870{ 1844{
1871 struct net *net = sock_net(skb->sk); 1845 struct net *net = sock_net(skb->sk);
1872 struct ifinfomsg *ifm; 1846 struct ifinfomsg *ifm;
@@ -1957,8 +1931,11 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
1957 if (rtnl_msg_handlers[idx] == NULL || 1931 if (rtnl_msg_handlers[idx] == NULL ||
1958 rtnl_msg_handlers[idx][type].dumpit == NULL) 1932 rtnl_msg_handlers[idx][type].dumpit == NULL)
1959 continue; 1933 continue;
1960 if (idx > s_idx) 1934 if (idx > s_idx) {
1961 memset(&cb->args[0], 0, sizeof(cb->args)); 1935 memset(&cb->args[0], 0, sizeof(cb->args));
1936 cb->prev_seq = 0;
1937 cb->seq = 0;
1938 }
1962 if (rtnl_msg_handlers[idx][type].dumpit(skb, cb)) 1939 if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
1963 break; 1940 break;
1964 } 1941 }
@@ -2051,7 +2028,39 @@ errout:
2051 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 2028 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2052} 2029}
2053 2030
2054static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 2031/**
2032 * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry
2033 */
2034int ndo_dflt_fdb_add(struct ndmsg *ndm,
2035 struct nlattr *tb[],
2036 struct net_device *dev,
2037 const unsigned char *addr,
2038 u16 flags)
2039{
2040 int err = -EINVAL;
2041
2042 /* If aging addresses are supported device will need to
2043 * implement its own handler for this.
2044 */
2045 if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
2046 pr_info("%s: FDB only supports static addresses\n", dev->name);
2047 return err;
2048 }
2049
2050 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
2051 err = dev_uc_add_excl(dev, addr);
2052 else if (is_multicast_ether_addr(addr))
2053 err = dev_mc_add_excl(dev, addr);
2054
2055 /* Only return duplicate errors if NLM_F_EXCL is set */
2056 if (err == -EEXIST && !(flags & NLM_F_EXCL))
2057 err = 0;
2058
2059 return err;
2060}
2061EXPORT_SYMBOL(ndo_dflt_fdb_add);
2062
2063static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
2055{ 2064{
2056 struct net *net = sock_net(skb->sk); 2065 struct net *net = sock_net(skb->sk);
2057 struct ndmsg *ndm; 2066 struct ndmsg *ndm;
@@ -2082,7 +2091,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2082 } 2091 }
2083 2092
2084 addr = nla_data(tb[NDA_LLADDR]); 2093 addr = nla_data(tb[NDA_LLADDR]);
2085 if (!is_valid_ether_addr(addr)) { 2094 if (is_zero_ether_addr(addr)) {
2086 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n"); 2095 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");
2087 return -EINVAL; 2096 return -EINVAL;
2088 } 2097 }
@@ -2103,10 +2112,13 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2103 } 2112 }
2104 2113
2105 /* Embedded bridge, macvlan, and any other device support */ 2114 /* Embedded bridge, macvlan, and any other device support */
2106 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { 2115 if ((ndm->ndm_flags & NTF_SELF)) {
2107 err = dev->netdev_ops->ndo_fdb_add(ndm, tb, 2116 if (dev->netdev_ops->ndo_fdb_add)
2108 dev, addr, 2117 err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
2109 nlh->nlmsg_flags); 2118 nlh->nlmsg_flags);
2119 else
2120 err = ndo_dflt_fdb_add(ndm, tb, dev, addr,
2121 nlh->nlmsg_flags);
2110 2122
2111 if (!err) { 2123 if (!err) {
2112 rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH); 2124 rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH);
@@ -2117,7 +2129,36 @@ out:
2117 return err; 2129 return err;
2118} 2130}
2119 2131
2120static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 2132/**
2133 * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry
2134 */
2135int ndo_dflt_fdb_del(struct ndmsg *ndm,
2136 struct nlattr *tb[],
2137 struct net_device *dev,
2138 const unsigned char *addr)
2139{
2140 int err = -EOPNOTSUPP;
2141
2142 /* If aging addresses are supported device will need to
2143 * implement its own handler for this.
2144 */
2145 if (ndm->ndm_state & NUD_PERMANENT) {
2146 pr_info("%s: FDB only supports static addresses\n", dev->name);
2147 return -EINVAL;
2148 }
2149
2150 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
2151 err = dev_uc_del(dev, addr);
2152 else if (is_multicast_ether_addr(addr))
2153 err = dev_mc_del(dev, addr);
2154 else
2155 err = -EINVAL;
2156
2157 return err;
2158}
2159EXPORT_SYMBOL(ndo_dflt_fdb_del);
2160
2161static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
2121{ 2162{
2122 struct net *net = sock_net(skb->sk); 2163 struct net *net = sock_net(skb->sk);
2123 struct ndmsg *ndm; 2164 struct ndmsg *ndm;
@@ -2174,8 +2215,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2174 } 2215 }
2175 2216
2176 /* Embedded bridge, macvlan, and any other device support */ 2217 /* Embedded bridge, macvlan, and any other device support */
2177 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { 2218 if (ndm->ndm_flags & NTF_SELF) {
2178 err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); 2219 if (dev->netdev_ops->ndo_fdb_del)
2220 err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
2221 else
2222 err = ndo_dflt_fdb_del(ndm, tb, dev, addr);
2179 2223
2180 if (!err) { 2224 if (!err) {
2181 rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); 2225 rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
@@ -2220,7 +2264,7 @@ skip:
2220 * @dev: netdevice 2264 * @dev: netdevice
2221 * 2265 *
2222 * Default netdevice operation to dump the existing unicast address list. 2266 * Default netdevice operation to dump the existing unicast address list.
2223 * Returns zero on success. 2267 * Returns number of addresses from list put in skb.
2224 */ 2268 */
2225int ndo_dflt_fdb_dump(struct sk_buff *skb, 2269int ndo_dflt_fdb_dump(struct sk_buff *skb,
2226 struct netlink_callback *cb, 2270 struct netlink_callback *cb,
@@ -2260,6 +2304,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2260 2304
2261 if (dev->netdev_ops->ndo_fdb_dump) 2305 if (dev->netdev_ops->ndo_fdb_dump)
2262 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); 2306 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx);
2307 else
2308 idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
2263 } 2309 }
2264 rcu_read_unlock(); 2310 rcu_read_unlock();
2265 2311
@@ -2411,8 +2457,7 @@ errout:
2411 return err; 2457 return err;
2412} 2458}
2413 2459
2414static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, 2460static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
2415 void *arg)
2416{ 2461{
2417 struct net *net = sock_net(skb->sk); 2462 struct net *net = sock_net(skb->sk);
2418 struct ifinfomsg *ifm; 2463 struct ifinfomsg *ifm;
@@ -2482,8 +2527,7 @@ out:
2482 return err; 2527 return err;
2483} 2528}
2484 2529
2485static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, 2530static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
2486 void *arg)
2487{ 2531{
2488 struct net *net = sock_net(skb->sk); 2532 struct net *net = sock_net(skb->sk);
2489 struct ifinfomsg *ifm; 2533 struct ifinfomsg *ifm;
@@ -2553,10 +2597,6 @@ out:
2553 return err; 2597 return err;
2554} 2598}
2555 2599
2556/* Protected by RTNL sempahore. */
2557static struct rtattr **rta_buf;
2558static int rtattr_max;
2559
2560/* Process one rtnetlink message. */ 2600/* Process one rtnetlink message. */
2561 2601
2562static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 2602static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -2564,7 +2604,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2564 struct net *net = sock_net(skb->sk); 2604 struct net *net = sock_net(skb->sk);
2565 rtnl_doit_func doit; 2605 rtnl_doit_func doit;
2566 int sz_idx, kind; 2606 int sz_idx, kind;
2567 int min_len;
2568 int family; 2607 int family;
2569 int type; 2608 int type;
2570 int err; 2609 int err;
@@ -2576,10 +2615,10 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2576 type -= RTM_BASE; 2615 type -= RTM_BASE;
2577 2616
2578 /* All the messages must have at least 1 byte length */ 2617 /* All the messages must have at least 1 byte length */
2579 if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) 2618 if (nlmsg_len(nlh) < sizeof(struct rtgenmsg))
2580 return 0; 2619 return 0;
2581 2620
2582 family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; 2621 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2583 sz_idx = type>>2; 2622 sz_idx = type>>2;
2584 kind = type&3; 2623 kind = type&3;
2585 2624
@@ -2612,32 +2651,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2612 return err; 2651 return err;
2613 } 2652 }
2614 2653
2615 memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
2616
2617 min_len = rtm_min[sz_idx];
2618 if (nlh->nlmsg_len < min_len)
2619 return -EINVAL;
2620
2621 if (nlh->nlmsg_len > min_len) {
2622 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
2623 struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
2624
2625 while (RTA_OK(attr, attrlen)) {
2626 unsigned int flavor = attr->rta_type & NLA_TYPE_MASK;
2627 if (flavor) {
2628 if (flavor > rta_max[sz_idx])
2629 return -EINVAL;
2630 rta_buf[flavor-1] = attr;
2631 }
2632 attr = RTA_NEXT(attr, attrlen);
2633 }
2634 }
2635
2636 doit = rtnl_get_doit(family, type); 2654 doit = rtnl_get_doit(family, type);
2637 if (doit == NULL) 2655 if (doit == NULL)
2638 return -EOPNOTSUPP; 2656 return -EOPNOTSUPP;
2639 2657
2640 return doit(skb, nlh, (void *)&rta_buf[0]); 2658 return doit(skb, nlh);
2641} 2659}
2642 2660
2643static void rtnetlink_rcv(struct sk_buff *skb) 2661static void rtnetlink_rcv(struct sk_buff *skb)
@@ -2707,16 +2725,6 @@ static struct pernet_operations rtnetlink_net_ops = {
2707 2725
2708void __init rtnetlink_init(void) 2726void __init rtnetlink_init(void)
2709{ 2727{
2710 int i;
2711
2712 rtattr_max = 0;
2713 for (i = 0; i < ARRAY_SIZE(rta_max); i++)
2714 if (rta_max[i] > rtattr_max)
2715 rtattr_max = rta_max[i];
2716 rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
2717 if (!rta_buf)
2718 panic("rtnetlink_init: cannot allocate rta_buf\n");
2719
2720 if (register_pernet_subsys(&rtnetlink_net_ops)) 2728 if (register_pernet_subsys(&rtnetlink_net_ops))
2721 panic("rtnetlink_init: cannot initialize rtnetlink\n"); 2729 panic("rtnetlink_init: cannot initialize rtnetlink\n");
2722 2730
diff --git a/net/core/scm.c b/net/core/scm.c
index 2dc6cdaaae8a..03795d0147f2 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -187,22 +187,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
187 187
188 p->creds.uid = uid; 188 p->creds.uid = uid;
189 p->creds.gid = gid; 189 p->creds.gid = gid;
190
191 if (!p->cred ||
192 !uid_eq(p->cred->euid, uid) ||
193 !gid_eq(p->cred->egid, gid)) {
194 struct cred *cred;
195 err = -ENOMEM;
196 cred = prepare_creds();
197 if (!cred)
198 goto error;
199
200 cred->uid = cred->euid = uid;
201 cred->gid = cred->egid = gid;
202 if (p->cred)
203 put_cred(p->cred);
204 p->cred = cred;
205 }
206 break; 190 break;
207 } 191 }
208 default: 192 default:
@@ -306,8 +290,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
306 /* Bump the usage count and install the file. */ 290 /* Bump the usage count and install the file. */
307 sock = sock_from_file(fp[i], &err); 291 sock = sock_from_file(fp[i], &err);
308 if (sock) { 292 if (sock) {
309 sock_update_netprioidx(sock->sk, current); 293 sock_update_netprioidx(sock->sk);
310 sock_update_classid(sock->sk, current); 294 sock_update_classid(sock->sk);
311 } 295 }
312 fd_install(new_fd, get_file(fp[i])); 296 fd_install(new_fd, get_file(fp[i]));
313 } 297 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 33245ef54c3b..898cf5c566f9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -179,6 +179,33 @@ out:
179 * 179 *
180 */ 180 */
181 181
182struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
183{
184 struct sk_buff *skb;
185
186 /* Get the HEAD */
187 skb = kmem_cache_alloc_node(skbuff_head_cache,
188 gfp_mask & ~__GFP_DMA, node);
189 if (!skb)
190 goto out;
191
192 /*
193 * Only clear those fields we need to clear, not those that we will
194 * actually initialise below. Hence, don't put any more fields after
195 * the tail pointer in struct sk_buff!
196 */
197 memset(skb, 0, offsetof(struct sk_buff, tail));
198 skb->data = NULL;
199 skb->truesize = sizeof(struct sk_buff);
200 atomic_set(&skb->users, 1);
201
202#ifdef NET_SKBUFF_DATA_USES_OFFSET
203 skb->mac_header = ~0U;
204#endif
205out:
206 return skb;
207}
208
182/** 209/**
183 * __alloc_skb - allocate a network buffer 210 * __alloc_skb - allocate a network buffer
184 * @size: size to allocate 211 * @size: size to allocate
@@ -584,7 +611,8 @@ static void skb_release_head_state(struct sk_buff *skb)
584static void skb_release_all(struct sk_buff *skb) 611static void skb_release_all(struct sk_buff *skb)
585{ 612{
586 skb_release_head_state(skb); 613 skb_release_head_state(skb);
587 skb_release_data(skb); 614 if (likely(skb->data))
615 skb_release_data(skb);
588} 616}
589 617
590/** 618/**
@@ -673,6 +701,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
673 new->mac_header = old->mac_header; 701 new->mac_header = old->mac_header;
674 new->inner_transport_header = old->inner_transport_header; 702 new->inner_transport_header = old->inner_transport_header;
675 new->inner_network_header = old->inner_network_header; 703 new->inner_network_header = old->inner_network_header;
704 new->inner_mac_header = old->inner_mac_header;
676 skb_dst_copy(new, old); 705 skb_dst_copy(new, old);
677 new->rxhash = old->rxhash; 706 new->rxhash = old->rxhash;
678 new->ooo_okay = old->ooo_okay; 707 new->ooo_okay = old->ooo_okay;
@@ -706,6 +735,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
706 new->tc_verd = old->tc_verd; 735 new->tc_verd = old->tc_verd;
707#endif 736#endif
708#endif 737#endif
738 new->vlan_proto = old->vlan_proto;
709 new->vlan_tci = old->vlan_tci; 739 new->vlan_tci = old->vlan_tci;
710 740
711 skb_copy_secmark(new, old); 741 skb_copy_secmark(new, old);
@@ -867,6 +897,18 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
867} 897}
868EXPORT_SYMBOL(skb_clone); 898EXPORT_SYMBOL(skb_clone);
869 899
900static void skb_headers_offset_update(struct sk_buff *skb, int off)
901{
902 /* {transport,network,mac}_header and tail are relative to skb->head */
903 skb->transport_header += off;
904 skb->network_header += off;
905 if (skb_mac_header_was_set(skb))
906 skb->mac_header += off;
907 skb->inner_transport_header += off;
908 skb->inner_network_header += off;
909 skb->inner_mac_header += off;
910}
911
870static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 912static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
871{ 913{
872#ifndef NET_SKBUFF_DATA_USES_OFFSET 914#ifndef NET_SKBUFF_DATA_USES_OFFSET
@@ -879,13 +921,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
879 __copy_skb_header(new, old); 921 __copy_skb_header(new, old);
880 922
881#ifndef NET_SKBUFF_DATA_USES_OFFSET 923#ifndef NET_SKBUFF_DATA_USES_OFFSET
882 /* {transport,network,mac}_header are relative to skb->head */ 924 skb_headers_offset_update(new, offset);
883 new->transport_header += offset;
884 new->network_header += offset;
885 if (skb_mac_header_was_set(new))
886 new->mac_header += offset;
887 new->inner_transport_header += offset;
888 new->inner_network_header += offset;
889#endif 925#endif
890 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; 926 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
891 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; 927 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
@@ -1077,14 +1113,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1077#else 1113#else
1078 skb->end = skb->head + size; 1114 skb->end = skb->head + size;
1079#endif 1115#endif
1080 /* {transport,network,mac}_header and tail are relative to skb->head */
1081 skb->tail += off; 1116 skb->tail += off;
1082 skb->transport_header += off; 1117 skb_headers_offset_update(skb, off);
1083 skb->network_header += off;
1084 if (skb_mac_header_was_set(skb))
1085 skb->mac_header += off;
1086 skb->inner_transport_header += off;
1087 skb->inner_network_header += off;
1088 /* Only adjust this if it actually is csum_start rather than csum */ 1118 /* Only adjust this if it actually is csum_start rather than csum */
1089 if (skb->ip_summed == CHECKSUM_PARTIAL) 1119 if (skb->ip_summed == CHECKSUM_PARTIAL)
1090 skb->csum_start += nhead; 1120 skb->csum_start += nhead;
@@ -1180,12 +1210,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1180 if (n->ip_summed == CHECKSUM_PARTIAL) 1210 if (n->ip_summed == CHECKSUM_PARTIAL)
1181 n->csum_start += off; 1211 n->csum_start += off;
1182#ifdef NET_SKBUFF_DATA_USES_OFFSET 1212#ifdef NET_SKBUFF_DATA_USES_OFFSET
1183 n->transport_header += off; 1213 skb_headers_offset_update(n, off);
1184 n->network_header += off;
1185 if (skb_mac_header_was_set(skb))
1186 n->mac_header += off;
1187 n->inner_transport_header += off;
1188 n->inner_network_header += off;
1189#endif 1214#endif
1190 1215
1191 return n; 1216 return n;
@@ -2741,12 +2766,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2741 unsigned int tnl_hlen = skb_tnl_header_len(skb); 2766 unsigned int tnl_hlen = skb_tnl_header_len(skb);
2742 unsigned int headroom; 2767 unsigned int headroom;
2743 unsigned int len; 2768 unsigned int len;
2769 __be16 proto;
2770 bool csum;
2744 int sg = !!(features & NETIF_F_SG); 2771 int sg = !!(features & NETIF_F_SG);
2745 int nfrags = skb_shinfo(skb)->nr_frags; 2772 int nfrags = skb_shinfo(skb)->nr_frags;
2746 int err = -ENOMEM; 2773 int err = -ENOMEM;
2747 int i = 0; 2774 int i = 0;
2748 int pos; 2775 int pos;
2749 2776
2777 proto = skb_network_protocol(skb);
2778 if (unlikely(!proto))
2779 return ERR_PTR(-EINVAL);
2780
2781 csum = !!can_checksum_protocol(features, proto);
2750 __skb_push(skb, doffset); 2782 __skb_push(skb, doffset);
2751 headroom = skb_headroom(skb); 2783 headroom = skb_headroom(skb);
2752 pos = skb_headlen(skb); 2784 pos = skb_headlen(skb);
@@ -2884,6 +2916,12 @@ skip_fraglist:
2884 nskb->data_len = len - hsize; 2916 nskb->data_len = len - hsize;
2885 nskb->len += nskb->data_len; 2917 nskb->len += nskb->data_len;
2886 nskb->truesize += nskb->data_len; 2918 nskb->truesize += nskb->data_len;
2919
2920 if (!csum) {
2921 nskb->csum = skb_checksum(nskb, doffset,
2922 nskb->len - doffset, 0);
2923 nskb->ip_summed = CHECKSUM_NONE;
2924 }
2887 } while ((offset += len) < skb->len); 2925 } while ((offset += len) < skb->len);
2888 2926
2889 return segs; 2927 return segs;
@@ -3361,6 +3399,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
3361 skb->ip_summed = CHECKSUM_PARTIAL; 3399 skb->ip_summed = CHECKSUM_PARTIAL;
3362 skb->csum_start = skb_headroom(skb) + start; 3400 skb->csum_start = skb_headroom(skb) + start;
3363 skb->csum_offset = off; 3401 skb->csum_offset = off;
3402 skb_set_transport_header(skb, start);
3364 return true; 3403 return true;
3365} 3404}
3366EXPORT_SYMBOL_GPL(skb_partial_csum_set); 3405EXPORT_SYMBOL_GPL(skb_partial_csum_set);
diff --git a/net/core/sock.c b/net/core/sock.c
index b261a7977746..d4f4cea726e7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -907,6 +907,10 @@ set_rcvbuf:
907 sock_valbool_flag(sk, SOCK_NOFCS, valbool); 907 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
908 break; 908 break;
909 909
910 case SO_SELECT_ERR_QUEUE:
911 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
912 break;
913
910 default: 914 default:
911 ret = -ENOPROTOOPT; 915 ret = -ENOPROTOOPT;
912 break; 916 break;
@@ -1160,6 +1164,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1160 v.val = sock_flag(sk, SOCK_FILTER_LOCKED); 1164 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1161 break; 1165 break;
1162 1166
1167 case SO_SELECT_ERR_QUEUE:
1168 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1169 break;
1170
1163 default: 1171 default:
1164 return -ENOPROTOOPT; 1172 return -ENOPROTOOPT;
1165 } 1173 }
@@ -1298,13 +1306,12 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1298 module_put(owner); 1306 module_put(owner);
1299} 1307}
1300 1308
1301#ifdef CONFIG_CGROUPS
1302#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) 1309#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1303void sock_update_classid(struct sock *sk, struct task_struct *task) 1310void sock_update_classid(struct sock *sk)
1304{ 1311{
1305 u32 classid; 1312 u32 classid;
1306 1313
1307 classid = task_cls_classid(task); 1314 classid = task_cls_classid(current);
1308 if (classid != sk->sk_classid) 1315 if (classid != sk->sk_classid)
1309 sk->sk_classid = classid; 1316 sk->sk_classid = classid;
1310} 1317}
@@ -1312,16 +1319,15 @@ EXPORT_SYMBOL(sock_update_classid);
1312#endif 1319#endif
1313 1320
1314#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) 1321#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1315void sock_update_netprioidx(struct sock *sk, struct task_struct *task) 1322void sock_update_netprioidx(struct sock *sk)
1316{ 1323{
1317 if (in_interrupt()) 1324 if (in_interrupt())
1318 return; 1325 return;
1319 1326
1320 sk->sk_cgrp_prioidx = task_netprioidx(task); 1327 sk->sk_cgrp_prioidx = task_netprioidx(current);
1321} 1328}
1322EXPORT_SYMBOL_GPL(sock_update_netprioidx); 1329EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1323#endif 1330#endif
1324#endif
1325 1331
1326/** 1332/**
1327 * sk_alloc - All socket objects are allocated here 1333 * sk_alloc - All socket objects are allocated here
@@ -1347,8 +1353,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1347 sock_net_set(sk, get_net(net)); 1353 sock_net_set(sk, get_net(net));
1348 atomic_set(&sk->sk_wmem_alloc, 1); 1354 atomic_set(&sk->sk_wmem_alloc, 1);
1349 1355
1350 sock_update_classid(sk, current); 1356 sock_update_classid(sk);
1351 sock_update_netprioidx(sk, current); 1357 sock_update_netprioidx(sk);
1352 } 1358 }
1353 1359
1354 return sk; 1360 return sk;
diff --git a/net/core/utils.c b/net/core/utils.c
index e3487e461939..3c7f5b51b979 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/jiffies.h> 18#include <linux/jiffies.h>
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/ctype.h>
20#include <linux/inet.h> 21#include <linux/inet.h>
21#include <linux/mm.h> 22#include <linux/mm.h>
22#include <linux/net.h> 23#include <linux/net.h>
@@ -348,9 +349,7 @@ int mac_pton(const char *s, u8 *mac)
348 349
349 /* Don't dirty result unless string is valid MAC. */ 350 /* Don't dirty result unless string is valid MAC. */
350 for (i = 0; i < ETH_ALEN; i++) { 351 for (i = 0; i < ETH_ALEN; i++) {
351 if (!strchr("0123456789abcdefABCDEF", s[i * 3])) 352 if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1]))
352 return 0;
353 if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1]))
354 return 0; 353 return 0;
355 if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':') 354 if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
356 return 0; 355 return 0;
diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c
index 1d9eb7c60a68..4f72fc40bf02 100644
--- a/net/dcb/dcbevent.c
+++ b/net/dcb/dcbevent.c
@@ -20,6 +20,7 @@
20#include <linux/rtnetlink.h> 20#include <linux/rtnetlink.h>
21#include <linux/notifier.h> 21#include <linux/notifier.h>
22#include <linux/export.h> 22#include <linux/export.h>
23#include <net/dcbevent.h>
23 24
24static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain); 25static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain);
25 26
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 21291f1abcd6..40d5829ed36a 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1658,7 +1658,7 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = {
1658 [DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get }, 1658 [DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get },
1659}; 1659};
1660 1660
1661static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1661static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
1662{ 1662{
1663 struct net *net = sock_net(skb->sk); 1663 struct net *net = sock_net(skb->sk);
1664 struct net_device *netdev; 1664 struct net_device *netdev;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4f9f5eb478f1..ebc54fef85a5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
500 return &rt->dst; 500 return &rt->dst;
501} 501}
502 502
503static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, 503static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
504 struct request_values *rv_unused)
505{ 504{
506 int err = -1; 505 int err = -1;
507 struct sk_buff *skb; 506 struct sk_buff *skb;
@@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
658 dreq->dreq_gss = dreq->dreq_iss; 657 dreq->dreq_gss = dreq->dreq_iss;
659 dreq->dreq_service = service; 658 dreq->dreq_service = service;
660 659
661 if (dccp_v4_send_response(sk, req, NULL)) 660 if (dccp_v4_send_response(sk, req))
662 goto drop_and_free; 661 goto drop_and_free;
663 662
664 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 663 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e05981f271e..9c61f9c02fdb 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -213,8 +213,7 @@ out:
213} 213}
214 214
215 215
216static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, 216static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
217 struct request_values *rv_unused)
218{ 217{
219 struct inet6_request_sock *ireq6 = inet6_rsk(req); 218 struct inet6_request_sock *ireq6 = inet6_rsk(req);
220 struct ipv6_pinfo *np = inet6_sk(sk); 219 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
428 dreq->dreq_gss = dreq->dreq_iss; 427 dreq->dreq_gss = dreq->dreq_iss;
429 dreq->dreq_service = service; 428 dreq->dreq_service = service;
430 429
431 if (dccp_v6_send_response(sk, req, NULL)) 430 if (dccp_v6_send_response(sk, req))
432 goto drop_and_free; 431 goto drop_and_free;
433 432
434 inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 433 inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c8da116d84a4..7d9197063ebb 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -563,7 +563,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
563 .len = IFNAMSIZ - 1 }, 563 .len = IFNAMSIZ - 1 },
564}; 564};
565 565
566static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 566static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
567{ 567{
568 struct net *net = sock_net(skb->sk); 568 struct net *net = sock_net(skb->sk);
569 struct nlattr *tb[IFA_MAX+1]; 569 struct nlattr *tb[IFA_MAX+1];
@@ -607,7 +607,7 @@ errout:
607 return err; 607 return err;
608} 608}
609 609
610static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 610static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
611{ 611{
612 struct net *net = sock_net(skb->sk); 612 struct net *net = sock_net(skb->sk);
613 struct nlattr *tb[IFA_MAX+1]; 613 struct nlattr *tb[IFA_MAX+1];
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index e36614eccc04..57dc159245ec 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -145,22 +145,10 @@ static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi
145 return NULL; 145 return NULL;
146} 146}
147 147
148__le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type) 148static int dn_fib_count_nhs(const struct nlattr *attr)
149{ 149{
150 while(RTA_OK(attr,attrlen)) { 150 struct rtnexthop *nhp = nla_data(attr);
151 if (attr->rta_type == type) 151 int nhs = 0, nhlen = nla_len(attr);
152 return *(__le16*)RTA_DATA(attr);
153 attr = RTA_NEXT(attr, attrlen);
154 }
155
156 return 0;
157}
158
159static int dn_fib_count_nhs(struct rtattr *rta)
160{
161 int nhs = 0;
162 struct rtnexthop *nhp = RTA_DATA(rta);
163 int nhlen = RTA_PAYLOAD(rta);
164 152
165 while(nhlen >= (int)sizeof(struct rtnexthop)) { 153 while(nhlen >= (int)sizeof(struct rtnexthop)) {
166 if ((nhlen -= nhp->rtnh_len) < 0) 154 if ((nhlen -= nhp->rtnh_len) < 0)
@@ -172,10 +160,11 @@ static int dn_fib_count_nhs(struct rtattr *rta)
172 return nhs; 160 return nhs;
173} 161}
174 162
175static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) 163static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
164 const struct rtmsg *r)
176{ 165{
177 struct rtnexthop *nhp = RTA_DATA(rta); 166 struct rtnexthop *nhp = nla_data(attr);
178 int nhlen = RTA_PAYLOAD(rta); 167 int nhlen = nla_len(attr);
179 168
180 change_nexthops(fi) { 169 change_nexthops(fi) {
181 int attrlen = nhlen - sizeof(struct rtnexthop); 170 int attrlen = nhlen - sizeof(struct rtnexthop);
@@ -187,7 +176,10 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, cons
187 nh->nh_weight = nhp->rtnh_hops + 1; 176 nh->nh_weight = nhp->rtnh_hops + 1;
188 177
189 if (attrlen) { 178 if (attrlen) {
190 nh->nh_gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 179 struct nlattr *gw_attr;
180
181 gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
182 nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
191 } 183 }
192 nhp = RTNH_NEXT(nhp); 184 nhp = RTNH_NEXT(nhp);
193 } endfor_nexthops(fi); 185 } endfor_nexthops(fi);
@@ -268,7 +260,8 @@ out:
268} 260}
269 261
270 262
271struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta *rta, const struct nlmsghdr *nlh, int *errp) 263struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[],
264 const struct nlmsghdr *nlh, int *errp)
272{ 265{
273 int err; 266 int err;
274 struct dn_fib_info *fi = NULL; 267 struct dn_fib_info *fi = NULL;
@@ -281,11 +274,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
281 if (dn_fib_props[r->rtm_type].scope > r->rtm_scope) 274 if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
282 goto err_inval; 275 goto err_inval;
283 276
284 if (rta->rta_mp) { 277 if (attrs[RTA_MULTIPATH] &&
285 nhs = dn_fib_count_nhs(rta->rta_mp); 278 (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0)
286 if (nhs == 0) 279 goto err_inval;
287 goto err_inval;
288 }
289 280
290 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); 281 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
291 err = -ENOBUFS; 282 err = -ENOBUFS;
@@ -295,53 +286,65 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
295 fi->fib_protocol = r->rtm_protocol; 286 fi->fib_protocol = r->rtm_protocol;
296 fi->fib_nhs = nhs; 287 fi->fib_nhs = nhs;
297 fi->fib_flags = r->rtm_flags; 288 fi->fib_flags = r->rtm_flags;
298 if (rta->rta_priority)
299 fi->fib_priority = *rta->rta_priority;
300 if (rta->rta_mx) {
301 int attrlen = RTA_PAYLOAD(rta->rta_mx);
302 struct rtattr *attr = RTA_DATA(rta->rta_mx);
303 289
304 while(RTA_OK(attr, attrlen)) { 290 if (attrs[RTA_PRIORITY])
305 unsigned int flavour = attr->rta_type; 291 fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]);
292
293 if (attrs[RTA_METRICS]) {
294 struct nlattr *attr;
295 int rem;
306 296
307 if (flavour) { 297 nla_for_each_nested(attr, attrs[RTA_METRICS], rem) {
308 if (flavour > RTAX_MAX) 298 int type = nla_type(attr);
299
300 if (type) {
301 if (type > RTAX_MAX || nla_len(attr) < 4)
309 goto err_inval; 302 goto err_inval;
310 fi->fib_metrics[flavour-1] = *(unsigned int *)RTA_DATA(attr); 303
304 fi->fib_metrics[type-1] = nla_get_u32(attr);
311 } 305 }
312 attr = RTA_NEXT(attr, attrlen);
313 } 306 }
314 } 307 }
315 if (rta->rta_prefsrc)
316 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 2);
317 308
318 if (rta->rta_mp) { 309 if (attrs[RTA_PREFSRC])
319 if ((err = dn_fib_get_nhs(fi, rta->rta_mp, r)) != 0) 310 fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]);
311
312 if (attrs[RTA_MULTIPATH]) {
313 if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0)
320 goto failure; 314 goto failure;
321 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) 315
316 if (attrs[RTA_OIF] &&
317 fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF]))
322 goto err_inval; 318 goto err_inval;
323 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 2)) 319
320 if (attrs[RTA_GATEWAY] &&
321 fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY]))
324 goto err_inval; 322 goto err_inval;
325 } else { 323 } else {
326 struct dn_fib_nh *nh = fi->fib_nh; 324 struct dn_fib_nh *nh = fi->fib_nh;
327 if (rta->rta_oif) 325
328 nh->nh_oif = *rta->rta_oif; 326 if (attrs[RTA_OIF])
329 if (rta->rta_gw) 327 nh->nh_oif = nla_get_u32(attrs[RTA_OIF]);
330 memcpy(&nh->nh_gw, rta->rta_gw, 2); 328
329 if (attrs[RTA_GATEWAY])
330 nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
331
331 nh->nh_flags = r->rtm_flags; 332 nh->nh_flags = r->rtm_flags;
332 nh->nh_weight = 1; 333 nh->nh_weight = 1;
333 } 334 }
334 335
335 if (r->rtm_type == RTN_NAT) { 336 if (r->rtm_type == RTN_NAT) {
336 if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) 337 if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF])
337 goto err_inval; 338 goto err_inval;
338 memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 2); 339
340 fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
339 goto link_it; 341 goto link_it;
340 } 342 }
341 343
342 if (dn_fib_props[r->rtm_type].error) { 344 if (dn_fib_props[r->rtm_type].error) {
343 if (rta->rta_gw || rta->rta_oif || rta->rta_mp) 345 if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH])
344 goto err_inval; 346 goto err_inval;
347
345 goto link_it; 348 goto link_it;
346 } 349 }
347 350
@@ -367,8 +370,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
367 } 370 }
368 371
369 if (fi->fib_prefsrc) { 372 if (fi->fib_prefsrc) {
370 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || 373 if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] ||
371 memcmp(&fi->fib_prefsrc, rta->rta_dst, 2)) 374 fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST]))
372 if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 375 if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
373 goto err_inval; 376 goto err_inval;
374 } 377 }
@@ -486,39 +489,21 @@ void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)
486 spin_unlock_bh(&dn_fib_multipath_lock); 489 spin_unlock_bh(&dn_fib_multipath_lock);
487} 490}
488 491
489 492static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)
490static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
491{
492 int i;
493
494 for(i = 1; i <= RTA_MAX; i++) {
495 struct rtattr *attr = rta[i-1];
496 if (attr) {
497 if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2)
498 return -EINVAL;
499 if (i != RTA_MULTIPATH && i != RTA_METRICS &&
500 i != RTA_TABLE)
501 rta[i-1] = (struct rtattr *)RTA_DATA(attr);
502 }
503 }
504
505 return 0;
506}
507
508static inline u32 rtm_get_table(struct rtattr **rta, u8 table)
509{ 493{
510 if (rta[RTA_TABLE - 1]) 494 if (attrs[RTA_TABLE])
511 table = nla_get_u32((struct nlattr *) rta[RTA_TABLE - 1]); 495 table = nla_get_u32(attrs[RTA_TABLE]);
512 496
513 return table; 497 return table;
514} 498}
515 499
516static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 500static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
517{ 501{
518 struct net *net = sock_net(skb->sk); 502 struct net *net = sock_net(skb->sk);
519 struct dn_fib_table *tb; 503 struct dn_fib_table *tb;
520 struct rtattr **rta = arg; 504 struct rtmsg *r = nlmsg_data(nlh);
521 struct rtmsg *r = NLMSG_DATA(nlh); 505 struct nlattr *attrs[RTA_MAX+1];
506 int err;
522 507
523 if (!capable(CAP_NET_ADMIN)) 508 if (!capable(CAP_NET_ADMIN))
524 return -EPERM; 509 return -EPERM;
@@ -526,22 +511,24 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
526 if (!net_eq(net, &init_net)) 511 if (!net_eq(net, &init_net))
527 return -EINVAL; 512 return -EINVAL;
528 513
529 if (dn_fib_check_attr(r, rta)) 514 err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
530 return -EINVAL; 515 if (err < 0)
516 return err;
531 517
532 tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); 518 tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0);
533 if (tb) 519 if (!tb)
534 return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); 520 return -ESRCH;
535 521
536 return -ESRCH; 522 return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));
537} 523}
538 524
539static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 525static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
540{ 526{
541 struct net *net = sock_net(skb->sk); 527 struct net *net = sock_net(skb->sk);
542 struct dn_fib_table *tb; 528 struct dn_fib_table *tb;
543 struct rtattr **rta = arg; 529 struct rtmsg *r = nlmsg_data(nlh);
544 struct rtmsg *r = NLMSG_DATA(nlh); 530 struct nlattr *attrs[RTA_MAX+1];
531 int err;
545 532
546 if (!capable(CAP_NET_ADMIN)) 533 if (!capable(CAP_NET_ADMIN))
547 return -EPERM; 534 return -EPERM;
@@ -549,14 +536,15 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
549 if (!net_eq(net, &init_net)) 536 if (!net_eq(net, &init_net))
550 return -EINVAL; 537 return -EINVAL;
551 538
552 if (dn_fib_check_attr(r, rta)) 539 err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
553 return -EINVAL; 540 if (err < 0)
541 return err;
554 542
555 tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); 543 tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1);
556 if (tb) 544 if (!tb)
557 return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); 545 return -ENOBUFS;
558 546
559 return -ENOBUFS; 547 return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb));
560} 548}
561 549
562static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) 550static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
@@ -566,10 +554,31 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad
566 struct nlmsghdr nlh; 554 struct nlmsghdr nlh;
567 struct rtmsg rtm; 555 struct rtmsg rtm;
568 } req; 556 } req;
569 struct dn_kern_rta rta; 557 struct {
558 struct nlattr hdr;
559 __le16 dst;
560 } dst_attr = {
561 .dst = dst,
562 };
563 struct {
564 struct nlattr hdr;
565 __le16 prefsrc;
566 } prefsrc_attr = {
567 .prefsrc = ifa->ifa_local,
568 };
569 struct {
570 struct nlattr hdr;
571 u32 oif;
572 } oif_attr = {
573 .oif = ifa->ifa_dev->dev->ifindex,
574 };
575 struct nlattr *attrs[RTA_MAX+1] = {
576 [RTA_DST] = (struct nlattr *) &dst_attr,
577 [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr,
578 [RTA_OIF] = (struct nlattr *) &oif_attr,
579 };
570 580
571 memset(&req.rtm, 0, sizeof(req.rtm)); 581 memset(&req.rtm, 0, sizeof(req.rtm));
572 memset(&rta, 0, sizeof(rta));
573 582
574 if (type == RTN_UNICAST) 583 if (type == RTN_UNICAST)
575 tb = dn_fib_get_table(RT_MIN_TABLE, 1); 584 tb = dn_fib_get_table(RT_MIN_TABLE, 1);
@@ -591,14 +600,10 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad
591 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); 600 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
592 req.rtm.rtm_type = type; 601 req.rtm.rtm_type = type;
593 602
594 rta.rta_dst = &dst;
595 rta.rta_prefsrc = &ifa->ifa_local;
596 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
597
598 if (cmd == RTM_NEWROUTE) 603 if (cmd == RTM_NEWROUTE)
599 tb->insert(tb, &req.rtm, &rta, &req.nlh, NULL); 604 tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL);
600 else 605 else
601 tb->delete(tb, &req.rtm, &rta, &req.nlh, NULL); 606 tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL);
602} 607}
603 608
604static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa) 609static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5ac0e153ef83..fe32388ea24f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1613,23 +1613,41 @@ errout:
1613 return -EMSGSIZE; 1613 return -EMSGSIZE;
1614} 1614}
1615 1615
1616const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
1617 [RTA_DST] = { .type = NLA_U16 },
1618 [RTA_SRC] = { .type = NLA_U16 },
1619 [RTA_IIF] = { .type = NLA_U32 },
1620 [RTA_OIF] = { .type = NLA_U32 },
1621 [RTA_GATEWAY] = { .type = NLA_U16 },
1622 [RTA_PRIORITY] = { .type = NLA_U32 },
1623 [RTA_PREFSRC] = { .type = NLA_U16 },
1624 [RTA_METRICS] = { .type = NLA_NESTED },
1625 [RTA_MULTIPATH] = { .type = NLA_NESTED },
1626 [RTA_TABLE] = { .type = NLA_U32 },
1627 [RTA_MARK] = { .type = NLA_U32 },
1628};
1629
1616/* 1630/*
1617 * This is called by both endnodes and routers now. 1631 * This is called by both endnodes and routers now.
1618 */ 1632 */
1619static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) 1633static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1620{ 1634{
1621 struct net *net = sock_net(in_skb->sk); 1635 struct net *net = sock_net(in_skb->sk);
1622 struct rtattr **rta = arg;
1623 struct rtmsg *rtm = nlmsg_data(nlh); 1636 struct rtmsg *rtm = nlmsg_data(nlh);
1624 struct dn_route *rt = NULL; 1637 struct dn_route *rt = NULL;
1625 struct dn_skb_cb *cb; 1638 struct dn_skb_cb *cb;
1626 int err; 1639 int err;
1627 struct sk_buff *skb; 1640 struct sk_buff *skb;
1628 struct flowidn fld; 1641 struct flowidn fld;
1642 struct nlattr *tb[RTA_MAX+1];
1629 1643
1630 if (!net_eq(net, &init_net)) 1644 if (!net_eq(net, &init_net))
1631 return -EINVAL; 1645 return -EINVAL;
1632 1646
1647 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy);
1648 if (err < 0)
1649 return err;
1650
1633 memset(&fld, 0, sizeof(fld)); 1651 memset(&fld, 0, sizeof(fld));
1634 fld.flowidn_proto = DNPROTO_NSP; 1652 fld.flowidn_proto = DNPROTO_NSP;
1635 1653
@@ -1639,12 +1657,14 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1639 skb_reset_mac_header(skb); 1657 skb_reset_mac_header(skb);
1640 cb = DN_SKB_CB(skb); 1658 cb = DN_SKB_CB(skb);
1641 1659
1642 if (rta[RTA_SRC-1]) 1660 if (tb[RTA_SRC])
1643 memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2); 1661 fld.saddr = nla_get_le16(tb[RTA_SRC]);
1644 if (rta[RTA_DST-1]) 1662
1645 memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2); 1663 if (tb[RTA_DST])
1646 if (rta[RTA_IIF-1]) 1664 fld.daddr = nla_get_le16(tb[RTA_DST]);
1647 memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); 1665
1666 if (tb[RTA_IIF])
1667 fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]);
1648 1668
1649 if (fld.flowidn_iif) { 1669 if (fld.flowidn_iif) {
1650 struct net_device *dev; 1670 struct net_device *dev;
@@ -1669,10 +1689,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1669 if (!err && -rt->dst.error) 1689 if (!err && -rt->dst.error)
1670 err = rt->dst.error; 1690 err = rt->dst.error;
1671 } else { 1691 } else {
1672 int oif = 0; 1692 if (tb[RTA_OIF])
1673 if (rta[RTA_OIF - 1]) 1693 fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]);
1674 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); 1694
1675 fld.flowidn_oif = oif;
1676 err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); 1695 err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
1677 } 1696 }
1678 1697
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 6c2445bcaba1..86e3807052e9 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -19,7 +19,6 @@
19#include <linux/sockios.h> 19#include <linux/sockios.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/netlink.h>
23#include <linux/rtnetlink.h> 22#include <linux/rtnetlink.h>
24#include <linux/proc_fs.h> 23#include <linux/proc_fs.h>
25#include <linux/netdevice.h> 24#include <linux/netdevice.h>
@@ -224,26 +223,27 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
224} 223}
225 224
226 225
227static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern_rta *rta, struct dn_fib_info *fi) 226static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi)
228{ 227{
229 struct rtnexthop *nhp; 228 struct rtnexthop *nhp;
230 int nhlen; 229 int nhlen;
231 230
232 if (rta->rta_priority && *rta->rta_priority != fi->fib_priority) 231 if (attrs[RTA_PRIORITY] &&
232 nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority)
233 return 1; 233 return 1;
234 234
235 if (rta->rta_oif || rta->rta_gw) { 235 if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) {
236 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && 236 if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) &&
237 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 2) == 0)) 237 (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw))
238 return 0; 238 return 0;
239 return 1; 239 return 1;
240 } 240 }
241 241
242 if (rta->rta_mp == NULL) 242 if (!attrs[RTA_MULTIPATH])
243 return 0; 243 return 0;
244 244
245 nhp = RTA_DATA(rta->rta_mp); 245 nhp = nla_data(attrs[RTA_MULTIPATH]);
246 nhlen = RTA_PAYLOAD(rta->rta_mp); 246 nhlen = nla_len(attrs[RTA_MULTIPATH]);
247 247
248 for_nexthops(fi) { 248 for_nexthops(fi) {
249 int attrlen = nhlen - sizeof(struct rtnexthop); 249 int attrlen = nhlen - sizeof(struct rtnexthop);
@@ -254,7 +254,10 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
254 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) 254 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
255 return 1; 255 return 1;
256 if (attrlen) { 256 if (attrlen) {
257 gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 257 struct nlattr *gw_attr;
258
259 gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
260 gw = gw_attr ? nla_get_le16(gw_attr) : 0;
258 261
259 if (gw && gw != nh->nh_gw) 262 if (gw && gw != nh->nh_gw)
260 return 1; 263 return 1;
@@ -488,7 +491,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
488 if (!net_eq(net, &init_net)) 491 if (!net_eq(net, &init_net))
489 return 0; 492 return 0;
490 493
491 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && 494 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
492 ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED) 495 ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED)
493 return dn_cache_dump(skb, cb); 496 return dn_cache_dump(skb, cb);
494 497
@@ -517,7 +520,8 @@ out:
517 return skb->len; 520 return skb->len;
518} 521}
519 522
520static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) 523static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
524 struct nlmsghdr *n, struct netlink_skb_parms *req)
521{ 525{
522 struct dn_hash *table = (struct dn_hash *)tb->data; 526 struct dn_hash *table = (struct dn_hash *)tb->data;
523 struct dn_fib_node *new_f, *f, **fp, **del_fp; 527 struct dn_fib_node *new_f, *f, **fp, **del_fp;
@@ -536,15 +540,14 @@ static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct
536 return -ENOBUFS; 540 return -ENOBUFS;
537 541
538 dz_key_0(key); 542 dz_key_0(key);
539 if (rta->rta_dst) { 543 if (attrs[RTA_DST]) {
540 __le16 dst; 544 __le16 dst = nla_get_le16(attrs[RTA_DST]);
541 memcpy(&dst, rta->rta_dst, 2);
542 if (dst & ~DZ_MASK(dz)) 545 if (dst & ~DZ_MASK(dz))
543 return -EINVAL; 546 return -EINVAL;
544 key = dz_key(dst, dz); 547 key = dz_key(dst, dz);
545 } 548 }
546 549
547 if ((fi = dn_fib_create_info(r, rta, n, &err)) == NULL) 550 if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL)
548 return err; 551 return err;
549 552
550 if (dz->dz_nent > (dz->dz_divisor << 2) && 553 if (dz->dz_nent > (dz->dz_divisor << 2) &&
@@ -654,7 +657,8 @@ out:
654} 657}
655 658
656 659
657static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) 660static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
661 struct nlmsghdr *n, struct netlink_skb_parms *req)
658{ 662{
659 struct dn_hash *table = (struct dn_hash*)tb->data; 663 struct dn_hash *table = (struct dn_hash*)tb->data;
660 struct dn_fib_node **fp, **del_fp, *f; 664 struct dn_fib_node **fp, **del_fp, *f;
@@ -671,9 +675,8 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct
671 return -ESRCH; 675 return -ESRCH;
672 676
673 dz_key_0(key); 677 dz_key_0(key);
674 if (rta->rta_dst) { 678 if (attrs[RTA_DST]) {
675 __le16 dst; 679 __le16 dst = nla_get_le16(attrs[RTA_DST]);
676 memcpy(&dst, rta->rta_dst, 2);
677 if (dst & ~DZ_MASK(dz)) 680 if (dst & ~DZ_MASK(dz))
678 return -EINVAL; 681 return -EINVAL;
679 key = dz_key(dst, dz); 682 key = dz_key(dst, dz);
@@ -703,7 +706,7 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct
703 (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) && 706 (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
704 (!r->rtm_protocol || 707 (!r->rtm_protocol ||
705 fi->fib_protocol == r->rtm_protocol) && 708 fi->fib_protocol == r->rtm_protocol) &&
706 dn_fib_nh_match(r, n, rta, fi) == 0) 709 dn_fib_nh_match(r, n, attrs, fi) == 0)
707 del_fp = fp; 710 del_fp = fp;
708 } 711 }
709 712
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index dfe42012a044..2a7efe388344 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -19,7 +19,7 @@
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <linux/netfilter.h> 20#include <linux/netfilter.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22#include <linux/netlink.h> 22#include <net/netlink.h>
23#include <linux/netfilter_decnet.h> 23#include <linux/netfilter_decnet.h>
24 24
25#include <net/sock.h> 25#include <net/sock.h>
@@ -39,21 +39,21 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
39 unsigned char *ptr; 39 unsigned char *ptr;
40 struct nf_dn_rtmsg *rtm; 40 struct nf_dn_rtmsg *rtm;
41 41
42 size = NLMSG_SPACE(rt_skb->len); 42 size = NLMSG_ALIGN(rt_skb->len) +
43 size += NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)); 43 NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
44 skb = alloc_skb(size, GFP_ATOMIC); 44 skb = nlmsg_new(size, GFP_ATOMIC);
45 if (!skb) { 45 if (!skb) {
46 *errp = -ENOMEM; 46 *errp = -ENOMEM;
47 return NULL; 47 return NULL;
48 } 48 }
49 old_tail = skb->tail; 49 old_tail = skb->tail;
50 nlh = nlmsg_put(skb, 0, 0, 0, size - sizeof(*nlh), 0); 50 nlh = nlmsg_put(skb, 0, 0, 0, size, 0);
51 if (!nlh) { 51 if (!nlh) {
52 kfree_skb(skb); 52 kfree_skb(skb);
53 *errp = -ENOMEM; 53 *errp = -ENOMEM;
54 return NULL; 54 return NULL;
55 } 55 }
56 rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh); 56 rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh);
57 rtm->nfdn_ifindex = rt_skb->dev->ifindex; 57 rtm->nfdn_ifindex = rt_skb->dev->ifindex;
58 ptr = NFDN_RTMSG(rtm); 58 ptr = NFDN_RTMSG(rtm);
59 skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len); 59 skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 2bc62ea857c8..0eb5d5e76dfb 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * net/dsa/dsa.c - Hardware switch handling 2 * net/dsa/dsa.c - Hardware switch handling
3 * Copyright (c) 2008-2009 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -14,6 +15,9 @@
14#include <linux/slab.h> 15#include <linux/slab.h>
15#include <linux/module.h> 16#include <linux/module.h>
16#include <net/dsa.h> 17#include <net/dsa.h>
18#include <linux/of.h>
19#include <linux/of_mdio.h>
20#include <linux/of_platform.h>
17#include "dsa_priv.h" 21#include "dsa_priv.h"
18 22
19char dsa_driver_version[] = "0.1"; 23char dsa_driver_version[] = "0.1";
@@ -287,34 +291,239 @@ static struct net_device *dev_to_net_device(struct device *dev)
287 return NULL; 291 return NULL;
288} 292}
289 293
294#ifdef CONFIG_OF
295static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
296 struct dsa_chip_data *cd,
297 int chip_index,
298 struct device_node *link)
299{
300 int ret;
301 const __be32 *reg;
302 int link_port_addr;
303 int link_sw_addr;
304 struct device_node *parent_sw;
305 int len;
306
307 parent_sw = of_get_parent(link);
308 if (!parent_sw)
309 return -EINVAL;
310
311 reg = of_get_property(parent_sw, "reg", &len);
312 if (!reg || (len != sizeof(*reg) * 2))
313 return -EINVAL;
314
315 link_sw_addr = be32_to_cpup(reg + 1);
316
317 if (link_sw_addr >= pd->nr_chips)
318 return -EINVAL;
319
320 /* First time routing table allocation */
321 if (!cd->rtable) {
322 cd->rtable = kmalloc(pd->nr_chips * sizeof(s8), GFP_KERNEL);
323 if (!cd->rtable)
324 return -ENOMEM;
325
326 /* default to no valid uplink/downlink */
327 memset(cd->rtable, -1, pd->nr_chips * sizeof(s8));
328 }
329
330 reg = of_get_property(link, "reg", NULL);
331 if (!reg) {
332 ret = -EINVAL;
333 goto out;
334 }
335
336 link_port_addr = be32_to_cpup(reg);
337
338 cd->rtable[link_sw_addr] = link_port_addr;
339
340 return 0;
341out:
342 kfree(cd->rtable);
343 return ret;
344}
345
346static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
347{
348 int i;
349 int port_index;
350
351 for (i = 0; i < pd->nr_chips; i++) {
352 port_index = 0;
353 while (port_index < DSA_MAX_PORTS) {
354 if (pd->chip[i].port_names[port_index])
355 kfree(pd->chip[i].port_names[port_index]);
356 port_index++;
357 }
358 kfree(pd->chip[i].rtable);
359 }
360 kfree(pd->chip);
361}
362
363static int dsa_of_probe(struct platform_device *pdev)
364{
365 struct device_node *np = pdev->dev.of_node;
366 struct device_node *child, *mdio, *ethernet, *port, *link;
367 struct mii_bus *mdio_bus;
368 struct platform_device *ethernet_dev;
369 struct dsa_platform_data *pd;
370 struct dsa_chip_data *cd;
371 const char *port_name;
372 int chip_index, port_index;
373 const unsigned int *sw_addr, *port_reg;
374 int ret;
375
376 mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
377 if (!mdio)
378 return -EINVAL;
379
380 mdio_bus = of_mdio_find_bus(mdio);
381 if (!mdio_bus)
382 return -EINVAL;
383
384 ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
385 if (!ethernet)
386 return -EINVAL;
387
388 ethernet_dev = of_find_device_by_node(ethernet);
389 if (!ethernet_dev)
390 return -ENODEV;
391
392 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
393 if (!pd)
394 return -ENOMEM;
395
396 pdev->dev.platform_data = pd;
397 pd->netdev = &ethernet_dev->dev;
398 pd->nr_chips = of_get_child_count(np);
399 if (pd->nr_chips > DSA_MAX_SWITCHES)
400 pd->nr_chips = DSA_MAX_SWITCHES;
401
402 pd->chip = kzalloc(pd->nr_chips * sizeof(struct dsa_chip_data),
403 GFP_KERNEL);
404 if (!pd->chip) {
405 ret = -ENOMEM;
406 goto out_free;
407 }
408
409 chip_index = 0;
410 for_each_available_child_of_node(np, child) {
411 cd = &pd->chip[chip_index];
412
413 cd->mii_bus = &mdio_bus->dev;
414
415 sw_addr = of_get_property(child, "reg", NULL);
416 if (!sw_addr)
417 continue;
418
419 cd->sw_addr = be32_to_cpup(sw_addr);
420 if (cd->sw_addr > PHY_MAX_ADDR)
421 continue;
422
423 for_each_available_child_of_node(child, port) {
424 port_reg = of_get_property(port, "reg", NULL);
425 if (!port_reg)
426 continue;
427
428 port_index = be32_to_cpup(port_reg);
429
430 port_name = of_get_property(port, "label", NULL);
431 if (!port_name)
432 continue;
433
434 cd->port_names[port_index] = kstrdup(port_name,
435 GFP_KERNEL);
436 if (!cd->port_names[port_index]) {
437 ret = -ENOMEM;
438 goto out_free_chip;
439 }
440
441 link = of_parse_phandle(port, "link", 0);
442
443 if (!strcmp(port_name, "dsa") && link &&
444 pd->nr_chips > 1) {
445 ret = dsa_of_setup_routing_table(pd, cd,
446 chip_index, link);
447 if (ret)
448 goto out_free_chip;
449 }
450
451 if (port_index == DSA_MAX_PORTS)
452 break;
453 }
454 }
455
456 return 0;
457
458out_free_chip:
459 dsa_of_free_platform_data(pd);
460out_free:
461 kfree(pd);
462 pdev->dev.platform_data = NULL;
463 return ret;
464}
465
466static void dsa_of_remove(struct platform_device *pdev)
467{
468 struct dsa_platform_data *pd = pdev->dev.platform_data;
469
470 if (!pdev->dev.of_node)
471 return;
472
473 dsa_of_free_platform_data(pd);
474 kfree(pd);
475}
476#else
477static inline int dsa_of_probe(struct platform_device *pdev)
478{
479 return 0;
480}
481
482static inline void dsa_of_remove(struct platform_device *pdev)
483{
484}
485#endif
486
290static int dsa_probe(struct platform_device *pdev) 487static int dsa_probe(struct platform_device *pdev)
291{ 488{
292 static int dsa_version_printed; 489 static int dsa_version_printed;
293 struct dsa_platform_data *pd = pdev->dev.platform_data; 490 struct dsa_platform_data *pd = pdev->dev.platform_data;
294 struct net_device *dev; 491 struct net_device *dev;
295 struct dsa_switch_tree *dst; 492 struct dsa_switch_tree *dst;
296 int i; 493 int i, ret;
297 494
298 if (!dsa_version_printed++) 495 if (!dsa_version_printed++)
299 printk(KERN_NOTICE "Distributed Switch Architecture " 496 printk(KERN_NOTICE "Distributed Switch Architecture "
300 "driver version %s\n", dsa_driver_version); 497 "driver version %s\n", dsa_driver_version);
301 498
499 if (pdev->dev.of_node) {
500 ret = dsa_of_probe(pdev);
501 if (ret)
502 return ret;
503
504 pd = pdev->dev.platform_data;
505 }
506
302 if (pd == NULL || pd->netdev == NULL) 507 if (pd == NULL || pd->netdev == NULL)
303 return -EINVAL; 508 return -EINVAL;
304 509
305 dev = dev_to_net_device(pd->netdev); 510 dev = dev_to_net_device(pd->netdev);
306 if (dev == NULL) 511 if (dev == NULL) {
307 return -EINVAL; 512 ret = -EINVAL;
513 goto out;
514 }
308 515
309 if (dev->dsa_ptr != NULL) { 516 if (dev->dsa_ptr != NULL) {
310 dev_put(dev); 517 dev_put(dev);
311 return -EEXIST; 518 ret = -EEXIST;
519 goto out;
312 } 520 }
313 521
314 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 522 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
315 if (dst == NULL) { 523 if (dst == NULL) {
316 dev_put(dev); 524 dev_put(dev);
317 return -ENOMEM; 525 ret = -ENOMEM;
526 goto out;
318 } 527 }
319 528
320 platform_set_drvdata(pdev, dst); 529 platform_set_drvdata(pdev, dst);
@@ -366,6 +575,11 @@ static int dsa_probe(struct platform_device *pdev)
366 } 575 }
367 576
368 return 0; 577 return 0;
578
579out:
580 dsa_of_remove(pdev);
581
582 return ret;
369} 583}
370 584
371static int dsa_remove(struct platform_device *pdev) 585static int dsa_remove(struct platform_device *pdev)
@@ -385,6 +599,8 @@ static int dsa_remove(struct platform_device *pdev)
385 dsa_switch_destroy(ds); 599 dsa_switch_destroy(ds);
386 } 600 }
387 601
602 dsa_of_remove(pdev);
603
388 return 0; 604 return 0;
389} 605}
390 606
@@ -392,6 +608,12 @@ static void dsa_shutdown(struct platform_device *pdev)
392{ 608{
393} 609}
394 610
611static const struct of_device_id dsa_of_match_table[] = {
612 { .compatible = "marvell,dsa", },
613 {}
614};
615MODULE_DEVICE_TABLE(of, dsa_of_match_table);
616
395static struct platform_driver dsa_driver = { 617static struct platform_driver dsa_driver = {
396 .probe = dsa_probe, 618 .probe = dsa_probe,
397 .remove = dsa_remove, 619 .remove = dsa_remove,
@@ -399,6 +621,7 @@ static struct platform_driver dsa_driver = {
399 .driver = { 621 .driver = {
400 .name = "dsa", 622 .name = "dsa",
401 .owner = THIS_MODULE, 623 .owner = THIS_MODULE,
624 .of_match_table = dsa_of_match_table,
402 }, 625 },
403}; 626};
404 627
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index a36c85eab5b4..5359560926bc 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -195,7 +195,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
195 if (netdev_uses_trailer_tags(dev)) 195 if (netdev_uses_trailer_tags(dev))
196 return htons(ETH_P_TRAILER); 196 return htons(ETH_P_TRAILER);
197 197
198 if (ntohs(eth->h_proto) >= 1536) 198 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
199 return eth->h_proto; 199 return eth->h_proto;
200 200
201 /* 201 /*
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 43b95ca61114..55e1fd5b3e56 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -104,6 +104,7 @@ static const u8 lowpan_llprefix[] = {0xfe, 0x80};
104struct lowpan_dev_info { 104struct lowpan_dev_info {
105 struct net_device *real_dev; /* real WPAN device ptr */ 105 struct net_device *real_dev; /* real WPAN device ptr */
106 struct mutex dev_list_mtx; /* mutex for list ops */ 106 struct mutex dev_list_mtx; /* mutex for list ops */
107 unsigned short fragment_tag;
107}; 108};
108 109
109struct lowpan_dev_record { 110struct lowpan_dev_record {
@@ -120,7 +121,6 @@ struct lowpan_fragment {
120 struct list_head list; /* fragments list */ 121 struct list_head list; /* fragments list */
121}; 122};
122 123
123static unsigned short fragment_tag;
124static LIST_HEAD(lowpan_fragments); 124static LIST_HEAD(lowpan_fragments);
125static DEFINE_SPINLOCK(flist_lock); 125static DEFINE_SPINLOCK(flist_lock);
126 126
@@ -284,6 +284,9 @@ lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb)
284 /* checksum is always inline */ 284 /* checksum is always inline */
285 memcpy(*hc06_ptr, &uh->check, 2); 285 memcpy(*hc06_ptr, &uh->check, 2);
286 *hc06_ptr += 2; 286 *hc06_ptr += 2;
287
288 /* skip the UDP header */
289 skb_pull(skb, sizeof(struct udphdr));
287} 290}
288 291
289static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val) 292static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val)
@@ -309,9 +312,8 @@ static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val)
309} 312}
310 313
311static int 314static int
312lowpan_uncompress_udp_header(struct sk_buff *skb) 315lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)
313{ 316{
314 struct udphdr *uh = udp_hdr(skb);
315 u8 tmp; 317 u8 tmp;
316 318
317 if (!uh) 319 if (!uh)
@@ -358,6 +360,14 @@ lowpan_uncompress_udp_header(struct sk_buff *skb)
358 /* copy checksum */ 360 /* copy checksum */
359 memcpy(&uh->check, &skb->data[0], 2); 361 memcpy(&uh->check, &skb->data[0], 2);
360 skb_pull(skb, 2); 362 skb_pull(skb, 2);
363
364 /*
365 * UDP lenght needs to be infered from the lower layers
366 * here, we obtain the hint from the remaining size of the
367 * frame
368 */
369 uh->len = htons(skb->len + sizeof(struct udphdr));
370 pr_debug("uncompressed UDP length: src = %d", uh->len);
361 } else { 371 } else {
362 pr_debug("ERROR: unsupported NH format\n"); 372 pr_debug("ERROR: unsupported NH format\n");
363 goto err; 373 goto err;
@@ -572,17 +582,31 @@ static int lowpan_header_create(struct sk_buff *skb,
572 * this isn't implemented in mainline yet, so currently we assign 0xff 582 * this isn't implemented in mainline yet, so currently we assign 0xff
573 */ 583 */
574 { 584 {
585 mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
586 mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
587
575 /* prepare wpan address data */ 588 /* prepare wpan address data */
576 sa.addr_type = IEEE802154_ADDR_LONG; 589 sa.addr_type = IEEE802154_ADDR_LONG;
577 sa.pan_id = 0xff; 590 sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
578
579 da.addr_type = IEEE802154_ADDR_LONG;
580 da.pan_id = 0xff;
581 591
582 memcpy(&(da.hwaddr), daddr, 8);
583 memcpy(&(sa.hwaddr), saddr, 8); 592 memcpy(&(sa.hwaddr), saddr, 8);
593 /* intra-PAN communications */
594 da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
584 595
585 mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; 596 /*
597 * if the destination address is the broadcast address, use the
598 * corresponding short address
599 */
600 if (lowpan_is_addr_broadcast(daddr)) {
601 da.addr_type = IEEE802154_ADDR_SHORT;
602 da.short_addr = IEEE802154_ADDR_BROADCAST;
603 } else {
604 da.addr_type = IEEE802154_ADDR_LONG;
605 memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN);
606
607 /* request acknowledgment */
608 mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
609 }
586 610
587 return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, 611 return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
588 type, (void *)&da, (void *)&sa, skb->len); 612 type, (void *)&da, (void *)&sa, skb->len);
@@ -650,7 +674,7 @@ static void lowpan_fragment_timer_expired(unsigned long entry_addr)
650} 674}
651 675
652static struct lowpan_fragment * 676static struct lowpan_fragment *
653lowpan_alloc_new_frame(struct sk_buff *skb, u8 len, u16 tag) 677lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
654{ 678{
655 struct lowpan_fragment *frame; 679 struct lowpan_fragment *frame;
656 680
@@ -720,7 +744,7 @@ lowpan_process_data(struct sk_buff *skb)
720 { 744 {
721 struct lowpan_fragment *frame; 745 struct lowpan_fragment *frame;
722 /* slen stores the rightmost 8 bits of the 11 bits length */ 746 /* slen stores the rightmost 8 bits of the 11 bits length */
723 u8 slen, offset; 747 u8 slen, offset = 0;
724 u16 len, tag; 748 u16 len, tag;
725 bool found = false; 749 bool found = false;
726 750
@@ -731,6 +755,18 @@ lowpan_process_data(struct sk_buff *skb)
731 /* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */ 755 /* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */
732 len = ((iphc0 & 7) << 8) | slen; 756 len = ((iphc0 & 7) << 8) | slen;
733 757
758 if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) {
759 pr_debug("%s received a FRAG1 packet (tag: %d, "
760 "size of the entire IP packet: %d)",
761 __func__, tag, len);
762 } else { /* FRAGN */
763 if (lowpan_fetch_skb_u8(skb, &offset))
764 goto unlock_and_drop;
765 pr_debug("%s received a FRAGN packet (tag: %d, "
766 "size of the entire IP packet: %d, "
767 "offset: %d)", __func__, tag, len, offset * 8);
768 }
769
734 /* 770 /*
735 * check if frame assembling with the same tag is 771 * check if frame assembling with the same tag is
736 * already in progress 772 * already in progress
@@ -745,17 +781,13 @@ lowpan_process_data(struct sk_buff *skb)
745 781
746 /* alloc new frame structure */ 782 /* alloc new frame structure */
747 if (!found) { 783 if (!found) {
784 pr_debug("%s first fragment received for tag %d, "
785 "begin packet reassembly", __func__, tag);
748 frame = lowpan_alloc_new_frame(skb, len, tag); 786 frame = lowpan_alloc_new_frame(skb, len, tag);
749 if (!frame) 787 if (!frame)
750 goto unlock_and_drop; 788 goto unlock_and_drop;
751 } 789 }
752 790
753 if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1)
754 goto unlock_and_drop;
755
756 if (lowpan_fetch_skb_u8(skb, &offset)) /* fetch offset */
757 goto unlock_and_drop;
758
759 /* if payload fits buffer, copy it */ 791 /* if payload fits buffer, copy it */
760 if (likely((offset * 8 + skb->len) <= frame->length)) 792 if (likely((offset * 8 + skb->len) <= frame->length))
761 skb_copy_to_linear_data_offset(frame->skb, offset * 8, 793 skb_copy_to_linear_data_offset(frame->skb, offset * 8,
@@ -773,6 +805,9 @@ lowpan_process_data(struct sk_buff *skb)
773 list_del(&frame->list); 805 list_del(&frame->list);
774 spin_unlock_bh(&flist_lock); 806 spin_unlock_bh(&flist_lock);
775 807
808 pr_debug("%s successfully reassembled fragment "
809 "(tag %d)", __func__, tag);
810
776 dev_kfree_skb(skb); 811 dev_kfree_skb(skb);
777 skb = frame->skb; 812 skb = frame->skb;
778 kfree(frame); 813 kfree(frame);
@@ -918,10 +953,35 @@ lowpan_process_data(struct sk_buff *skb)
918 } 953 }
919 954
920 /* UDP data uncompression */ 955 /* UDP data uncompression */
921 if (iphc0 & LOWPAN_IPHC_NH_C) 956 if (iphc0 & LOWPAN_IPHC_NH_C) {
922 if (lowpan_uncompress_udp_header(skb)) 957 struct udphdr uh;
958 struct sk_buff *new;
959 if (lowpan_uncompress_udp_header(skb, &uh))
923 goto drop; 960 goto drop;
924 961
962 /*
963 * replace the compressed UDP head by the uncompressed UDP
964 * header
965 */
966 new = skb_copy_expand(skb, sizeof(struct udphdr),
967 skb_tailroom(skb), GFP_ATOMIC);
968 kfree_skb(skb);
969
970 if (!new)
971 return -ENOMEM;
972
973 skb = new;
974
975 skb_push(skb, sizeof(struct udphdr));
976 skb_reset_transport_header(skb);
977 skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr));
978
979 lowpan_raw_dump_table(__func__, "raw UDP header dump",
980 (u8 *)&uh, sizeof(uh));
981
982 hdr.nexthdr = UIP_PROTO_UDP;
983 }
984
925 /* Not fragmented package */ 985 /* Not fragmented package */
926 hdr.payload_len = htons(skb->len); 986 hdr.payload_len = htons(skb->len);
927 987
@@ -969,13 +1029,13 @@ static int lowpan_get_mac_header_length(struct sk_buff *skb)
969 1029
970static int 1030static int
971lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, 1031lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
972 int mlen, int plen, int offset) 1032 int mlen, int plen, int offset, int type)
973{ 1033{
974 struct sk_buff *frag; 1034 struct sk_buff *frag;
975 int hlen, ret; 1035 int hlen, ret;
976 1036
977 /* if payload length is zero, therefore it's a first fragment */ 1037 hlen = (type == LOWPAN_DISPATCH_FRAG1) ?
978 hlen = (plen == 0 ? LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE); 1038 LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE;
979 1039
980 lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); 1040 lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen);
981 1041
@@ -1003,14 +1063,14 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
1003} 1063}
1004 1064
1005static int 1065static int
1006lowpan_skb_fragmentation(struct sk_buff *skb) 1066lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev)
1007{ 1067{
1008 int err, header_length, payload_length, tag, offset = 0; 1068 int err, header_length, payload_length, tag, offset = 0;
1009 u8 head[5]; 1069 u8 head[5];
1010 1070
1011 header_length = lowpan_get_mac_header_length(skb); 1071 header_length = lowpan_get_mac_header_length(skb);
1012 payload_length = skb->len - header_length; 1072 payload_length = skb->len - header_length;
1013 tag = fragment_tag++; 1073 tag = lowpan_dev_info(dev)->fragment_tag++;
1014 1074
1015 /* first fragment header */ 1075 /* first fragment header */
1016 head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7); 1076 head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7);
@@ -1018,7 +1078,16 @@ lowpan_skb_fragmentation(struct sk_buff *skb)
1018 head[2] = tag >> 8; 1078 head[2] = tag >> 8;
1019 head[3] = tag & 0xff; 1079 head[3] = tag & 0xff;
1020 1080
1021 err = lowpan_fragment_xmit(skb, head, header_length, 0, 0); 1081 err = lowpan_fragment_xmit(skb, head, header_length, LOWPAN_FRAG_SIZE,
1082 0, LOWPAN_DISPATCH_FRAG1);
1083
1084 if (err) {
1085 pr_debug("%s unable to send FRAG1 packet (tag: %d)",
1086 __func__, tag);
1087 goto exit;
1088 }
1089
1090 offset = LOWPAN_FRAG_SIZE;
1022 1091
1023 /* next fragment header */ 1092 /* next fragment header */
1024 head[0] &= ~LOWPAN_DISPATCH_FRAG1; 1093 head[0] &= ~LOWPAN_DISPATCH_FRAG1;
@@ -1033,10 +1102,17 @@ lowpan_skb_fragmentation(struct sk_buff *skb)
1033 len = payload_length - offset; 1102 len = payload_length - offset;
1034 1103
1035 err = lowpan_fragment_xmit(skb, head, header_length, 1104 err = lowpan_fragment_xmit(skb, head, header_length,
1036 len, offset); 1105 len, offset, LOWPAN_DISPATCH_FRAGN);
1106 if (err) {
1107 pr_debug("%s unable to send a subsequent FRAGN packet "
1108 "(tag: %d, offset: %d", __func__, tag, offset);
1109 goto exit;
1110 }
1111
1037 offset += len; 1112 offset += len;
1038 } 1113 }
1039 1114
1115exit:
1040 return err; 1116 return err;
1041} 1117}
1042 1118
@@ -1059,14 +1135,14 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
1059 } 1135 }
1060 1136
1061 pr_debug("frame is too big, fragmentation is needed\n"); 1137 pr_debug("frame is too big, fragmentation is needed\n");
1062 err = lowpan_skb_fragmentation(skb); 1138 err = lowpan_skb_fragmentation(skb, dev);
1063error: 1139error:
1064 dev_kfree_skb(skb); 1140 dev_kfree_skb(skb);
1065out: 1141out:
1066 if (err < 0) 1142 if (err)
1067 pr_debug("ERROR: xmit failed\n"); 1143 pr_debug("ERROR: xmit failed\n");
1068 1144
1069 return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK); 1145 return (err < 0) ? NET_XMIT_DROP : err;
1070} 1146}
1071 1147
1072static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) 1148static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
@@ -1087,6 +1163,12 @@ static u16 lowpan_get_short_addr(const struct net_device *dev)
1087 return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); 1163 return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
1088} 1164}
1089 1165
1166static u8 lowpan_get_dsn(const struct net_device *dev)
1167{
1168 struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
1169 return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
1170}
1171
1090static struct header_ops lowpan_header_ops = { 1172static struct header_ops lowpan_header_ops = {
1091 .create = lowpan_header_create, 1173 .create = lowpan_header_create,
1092}; 1174};
@@ -1100,6 +1182,7 @@ static struct ieee802154_mlme_ops lowpan_mlme = {
1100 .get_pan_id = lowpan_get_pan_id, 1182 .get_pan_id = lowpan_get_pan_id,
1101 .get_phy = lowpan_get_phy, 1183 .get_phy = lowpan_get_phy,
1102 .get_short_addr = lowpan_get_short_addr, 1184 .get_short_addr = lowpan_get_short_addr,
1185 .get_dsn = lowpan_get_dsn,
1103}; 1186};
1104 1187
1105static void lowpan_setup(struct net_device *dev) 1188static void lowpan_setup(struct net_device *dev)
@@ -1203,6 +1286,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
1203 return -ENODEV; 1286 return -ENODEV;
1204 1287
1205 lowpan_dev_info(dev)->real_dev = real_dev; 1288 lowpan_dev_info(dev)->real_dev = real_dev;
1289 lowpan_dev_info(dev)->fragment_tag = 0;
1206 mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); 1290 mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
1207 1291
1208 entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL); 1292 entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index bba5f8336317..4b8f917658b5 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -92,9 +92,10 @@
92 */ 92 */
93#define lowpan_is_iid_16_bit_compressable(a) \ 93#define lowpan_is_iid_16_bit_compressable(a) \
94 ((((a)->s6_addr16[4]) == 0) && \ 94 ((((a)->s6_addr16[4]) == 0) && \
95 (((a)->s6_addr16[5]) == 0) && \ 95 (((a)->s6_addr[10]) == 0) && \
96 (((a)->s6_addr16[6]) == 0) && \ 96 (((a)->s6_addr[11]) == 0xff) && \
97 ((((a)->s6_addr[14]) & 0x80) == 0)) 97 (((a)->s6_addr[12]) == 0xfe) && \
98 (((a)->s6_addr[13]) == 0))
98 99
99/* multicast address */ 100/* multicast address */
100#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF) 101#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF)
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index e0da175f8e5b..581a59504bd5 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -291,6 +291,9 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
291 size_t copied = 0; 291 size_t copied = 0;
292 int err = -EOPNOTSUPP; 292 int err = -EOPNOTSUPP;
293 struct sk_buff *skb; 293 struct sk_buff *skb;
294 struct sockaddr_ieee802154 *saddr;
295
296 saddr = (struct sockaddr_ieee802154 *)msg->msg_name;
294 297
295 skb = skb_recv_datagram(sk, flags, noblock, &err); 298 skb = skb_recv_datagram(sk, flags, noblock, &err);
296 if (!skb) 299 if (!skb)
@@ -309,6 +312,13 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
309 312
310 sock_recv_ts_and_drops(msg, sk, skb); 313 sock_recv_ts_and_drops(msg, sk, skb);
311 314
315 if (saddr) {
316 saddr->family = AF_IEEE802154;
317 saddr->addr = mac_cb(skb)->sa;
318 }
319 if (addr_len)
320 *addr_len = sizeof(*saddr);
321
312 if (flags & MSG_TRUNC) 322 if (flags & MSG_TRUNC)
313 copied = skb->len; 323 copied = skb->len;
314done: 324done:
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 97351e1d07a4..7e49bbcc6967 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -64,8 +64,8 @@ struct sk_buff *ieee802154_nl_create(int flags, u8 req)
64 64
65int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group) 65int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group)
66{ 66{
67 /* XXX: nlh is right at the start of msg */ 67 struct nlmsghdr *nlh = nlmsg_hdr(msg);
68 void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); 68 void *hdr = genlmsg_data(nlmsg_data(nlh));
69 69
70 if (genlmsg_end(msg, hdr) < 0) 70 if (genlmsg_end(msg, hdr) < 0)
71 goto out; 71 goto out;
@@ -97,8 +97,8 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info,
97 97
98int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info) 98int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
99{ 99{
100 /* XXX: nlh is right at the start of msg */ 100 struct nlmsghdr *nlh = nlmsg_hdr(msg);
101 void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); 101 void *hdr = genlmsg_data(nlmsg_data(nlh));
102 102
103 if (genlmsg_end(msg, hdr) < 0) 103 if (genlmsg_end(msg, hdr) < 0)
104 goto out; 104 goto out;
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 96bb08abece2..b0bdd8c51e9c 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -315,7 +315,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
315 struct net_device *dev; 315 struct net_device *dev;
316 struct ieee802154_addr addr; 316 struct ieee802154_addr addr;
317 u8 page; 317 u8 page;
318 int ret = -EINVAL; 318 int ret = -EOPNOTSUPP;
319 319
320 if (!info->attrs[IEEE802154_ATTR_CHANNEL] || 320 if (!info->attrs[IEEE802154_ATTR_CHANNEL] ||
321 !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || 321 !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
@@ -327,6 +327,8 @@ static int ieee802154_associate_req(struct sk_buff *skb,
327 dev = ieee802154_nl_get_dev(info); 327 dev = ieee802154_nl_get_dev(info);
328 if (!dev) 328 if (!dev)
329 return -ENODEV; 329 return -ENODEV;
330 if (!ieee802154_mlme_ops(dev)->assoc_req)
331 goto out;
330 332
331 if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { 333 if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) {
332 addr.addr_type = IEEE802154_ADDR_LONG; 334 addr.addr_type = IEEE802154_ADDR_LONG;
@@ -350,6 +352,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
350 page, 352 page,
351 nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); 353 nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY]));
352 354
355out:
353 dev_put(dev); 356 dev_put(dev);
354 return ret; 357 return ret;
355} 358}
@@ -359,7 +362,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
359{ 362{
360 struct net_device *dev; 363 struct net_device *dev;
361 struct ieee802154_addr addr; 364 struct ieee802154_addr addr;
362 int ret = -EINVAL; 365 int ret = -EOPNOTSUPP;
363 366
364 if (!info->attrs[IEEE802154_ATTR_STATUS] || 367 if (!info->attrs[IEEE802154_ATTR_STATUS] ||
365 !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || 368 !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] ||
@@ -369,6 +372,8 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
369 dev = ieee802154_nl_get_dev(info); 372 dev = ieee802154_nl_get_dev(info);
370 if (!dev) 373 if (!dev)
371 return -ENODEV; 374 return -ENODEV;
375 if (!ieee802154_mlme_ops(dev)->assoc_resp)
376 goto out;
372 377
373 addr.addr_type = IEEE802154_ADDR_LONG; 378 addr.addr_type = IEEE802154_ADDR_LONG;
374 nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], 379 nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
@@ -380,6 +385,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
380 nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), 385 nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
381 nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); 386 nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS]));
382 387
388out:
383 dev_put(dev); 389 dev_put(dev);
384 return ret; 390 return ret;
385} 391}
@@ -389,7 +395,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
389{ 395{
390 struct net_device *dev; 396 struct net_device *dev;
391 struct ieee802154_addr addr; 397 struct ieee802154_addr addr;
392 int ret = -EINVAL; 398 int ret = -EOPNOTSUPP;
393 399
394 if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && 400 if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] &&
395 !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || 401 !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) ||
@@ -399,6 +405,8 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
399 dev = ieee802154_nl_get_dev(info); 405 dev = ieee802154_nl_get_dev(info);
400 if (!dev) 406 if (!dev)
401 return -ENODEV; 407 return -ENODEV;
408 if (!ieee802154_mlme_ops(dev)->disassoc_req)
409 goto out;
402 410
403 if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { 411 if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) {
404 addr.addr_type = IEEE802154_ADDR_LONG; 412 addr.addr_type = IEEE802154_ADDR_LONG;
@@ -415,6 +423,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
415 ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, 423 ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr,
416 nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); 424 nla_get_u8(info->attrs[IEEE802154_ATTR_REASON]));
417 425
426out:
418 dev_put(dev); 427 dev_put(dev);
419 return ret; 428 return ret;
420} 429}
@@ -432,7 +441,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
432 u8 channel, bcn_ord, sf_ord; 441 u8 channel, bcn_ord, sf_ord;
433 u8 page; 442 u8 page;
434 int pan_coord, blx, coord_realign; 443 int pan_coord, blx, coord_realign;
435 int ret; 444 int ret = -EOPNOTSUPP;
436 445
437 if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || 446 if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
438 !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || 447 !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] ||
@@ -448,6 +457,8 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
448 dev = ieee802154_nl_get_dev(info); 457 dev = ieee802154_nl_get_dev(info);
449 if (!dev) 458 if (!dev)
450 return -ENODEV; 459 return -ENODEV;
460 if (!ieee802154_mlme_ops(dev)->start_req)
461 goto out;
451 462
452 addr.addr_type = IEEE802154_ADDR_SHORT; 463 addr.addr_type = IEEE802154_ADDR_SHORT;
453 addr.short_addr = nla_get_u16( 464 addr.short_addr = nla_get_u16(
@@ -476,6 +487,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
476 ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page, 487 ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page,
477 bcn_ord, sf_ord, pan_coord, blx, coord_realign); 488 bcn_ord, sf_ord, pan_coord, blx, coord_realign);
478 489
490out:
479 dev_put(dev); 491 dev_put(dev);
480 return ret; 492 return ret;
481} 493}
@@ -483,7 +495,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
483static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) 495static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
484{ 496{
485 struct net_device *dev; 497 struct net_device *dev;
486 int ret; 498 int ret = -EOPNOTSUPP;
487 u8 type; 499 u8 type;
488 u32 channels; 500 u32 channels;
489 u8 duration; 501 u8 duration;
@@ -497,6 +509,8 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
497 dev = ieee802154_nl_get_dev(info); 509 dev = ieee802154_nl_get_dev(info);
498 if (!dev) 510 if (!dev)
499 return -ENODEV; 511 return -ENODEV;
512 if (!ieee802154_mlme_ops(dev)->scan_req)
513 goto out;
500 514
501 type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]); 515 type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]);
502 channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); 516 channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]);
@@ -511,6 +525,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
511 ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, 525 ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page,
512 duration); 526 duration);
513 527
528out:
514 dev_put(dev); 529 dev_put(dev);
515 return ret; 530 return ret;
516} 531}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7944df768454..8603ca827104 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -166,6 +166,7 @@ config IP_PNP_RARP
166config NET_IPIP 166config NET_IPIP
167 tristate "IP: tunneling" 167 tristate "IP: tunneling"
168 select INET_TUNNEL 168 select INET_TUNNEL
169 select NET_IP_TUNNEL
169 ---help--- 170 ---help---
170 Tunneling means encapsulating data of one protocol type within 171 Tunneling means encapsulating data of one protocol type within
171 another protocol and sending it over a channel that understands the 172 another protocol and sending it over a channel that understands the
@@ -186,9 +187,14 @@ config NET_IPGRE_DEMUX
186 This is helper module to demultiplex GRE packets on GRE version field criteria. 187 This is helper module to demultiplex GRE packets on GRE version field criteria.
187 Required by ip_gre and pptp modules. 188 Required by ip_gre and pptp modules.
188 189
190config NET_IP_TUNNEL
191 tristate
192 default n
193
189config NET_IPGRE 194config NET_IPGRE
190 tristate "IP: GRE tunnels over IP" 195 tristate "IP: GRE tunnels over IP"
191 depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX 196 depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX
197 select NET_IP_TUNNEL
192 help 198 help
193 Tunneling means encapsulating data of one protocol type within 199 Tunneling means encapsulating data of one protocol type within
194 another protocol and sending it over a channel that understands the 200 another protocol and sending it over a channel that understands the
@@ -313,6 +319,7 @@ config SYN_COOKIES
313config NET_IPVTI 319config NET_IPVTI
314 tristate "Virtual (secure) IP: tunneling" 320 tristate "Virtual (secure) IP: tunneling"
315 select INET_TUNNEL 321 select INET_TUNNEL
322 select NET_IP_TUNNEL
316 depends on INET_XFRM_MODE_TUNNEL 323 depends on INET_XFRM_MODE_TUNNEL
317 ---help--- 324 ---help---
318 Tunneling means encapsulating data of one protocol type within 325 Tunneling means encapsulating data of one protocol type within
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 15ca63ec604e..089cb9f36387 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -13,6 +13,7 @@ obj-y := route.o inetpeer.o protocol.o \
13 fib_frontend.o fib_semantics.o fib_trie.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o ping.o 14 inet_fragment.o ping.o
15 15
16obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 17obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
17obj-$(CONFIG_PROC_FS) += proc.o 18obj-$(CONFIG_PROC_FS) += proc.o
18obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 19obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c929d9c1c4b6..93824c57b108 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -111,7 +111,6 @@
111#include <net/sock.h> 111#include <net/sock.h>
112#include <net/raw.h> 112#include <net/raw.h>
113#include <net/icmp.h> 113#include <net/icmp.h>
114#include <net/ipip.h>
115#include <net/inet_common.h> 114#include <net/inet_common.h>
116#include <net/xfrm.h> 115#include <net/xfrm.h>
117#include <net/net_namespace.h> 116#include <net/net_namespace.h>
@@ -1283,9 +1282,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1283 int ihl; 1282 int ihl;
1284 int id; 1283 int id;
1285 unsigned int offset = 0; 1284 unsigned int offset = 0;
1286 1285 bool tunnel;
1287 if (!(features & NETIF_F_V4_CSUM))
1288 features &= ~NETIF_F_SG;
1289 1286
1290 if (unlikely(skb_shinfo(skb)->gso_type & 1287 if (unlikely(skb_shinfo(skb)->gso_type &
1291 ~(SKB_GSO_TCPV4 | 1288 ~(SKB_GSO_TCPV4 |
@@ -1293,6 +1290,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1293 SKB_GSO_DODGY | 1290 SKB_GSO_DODGY |
1294 SKB_GSO_TCP_ECN | 1291 SKB_GSO_TCP_ECN |
1295 SKB_GSO_GRE | 1292 SKB_GSO_GRE |
1293 SKB_GSO_UDP_TUNNEL |
1296 0))) 1294 0)))
1297 goto out; 1295 goto out;
1298 1296
@@ -1307,6 +1305,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1307 if (unlikely(!pskb_may_pull(skb, ihl))) 1305 if (unlikely(!pskb_may_pull(skb, ihl)))
1308 goto out; 1306 goto out;
1309 1307
1308 tunnel = !!skb->encapsulation;
1309
1310 __skb_pull(skb, ihl); 1310 __skb_pull(skb, ihl);
1311 skb_reset_transport_header(skb); 1311 skb_reset_transport_header(skb);
1312 iph = ip_hdr(skb); 1312 iph = ip_hdr(skb);
@@ -1326,7 +1326,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1326 skb = segs; 1326 skb = segs;
1327 do { 1327 do {
1328 iph = ip_hdr(skb); 1328 iph = ip_hdr(skb);
1329 if (proto == IPPROTO_UDP) { 1329 if (!tunnel && proto == IPPROTO_UDP) {
1330 iph->id = htons(id); 1330 iph->id = htons(id);
1331 iph->frag_off = htons(offset >> 3); 1331 iph->frag_off = htons(offset >> 3);
1332 if (skb->next != NULL) 1332 if (skb->next != NULL)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index fea4929f6200..247ec1951c35 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -654,11 +654,19 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
654 arp_ptr += dev->addr_len; 654 arp_ptr += dev->addr_len;
655 memcpy(arp_ptr, &src_ip, 4); 655 memcpy(arp_ptr, &src_ip, 4);
656 arp_ptr += 4; 656 arp_ptr += 4;
657 if (target_hw != NULL) 657
658 memcpy(arp_ptr, target_hw, dev->addr_len); 658 switch (dev->type) {
659 else 659#if IS_ENABLED(CONFIG_FIREWIRE_NET)
660 memset(arp_ptr, 0, dev->addr_len); 660 case ARPHRD_IEEE1394:
661 arp_ptr += dev->addr_len; 661 break;
662#endif
663 default:
664 if (target_hw != NULL)
665 memcpy(arp_ptr, target_hw, dev->addr_len);
666 else
667 memset(arp_ptr, 0, dev->addr_len);
668 arp_ptr += dev->addr_len;
669 }
662 memcpy(arp_ptr, &dest_ip, 4); 670 memcpy(arp_ptr, &dest_ip, 4);
663 671
664 return skb; 672 return skb;
@@ -781,7 +789,14 @@ static int arp_process(struct sk_buff *skb)
781 arp_ptr += dev->addr_len; 789 arp_ptr += dev->addr_len;
782 memcpy(&sip, arp_ptr, 4); 790 memcpy(&sip, arp_ptr, 4);
783 arp_ptr += 4; 791 arp_ptr += 4;
784 arp_ptr += dev->addr_len; 792 switch (dev_type) {
793#if IS_ENABLED(CONFIG_FIREWIRE_NET)
794 case ARPHRD_IEEE1394:
795 break;
796#endif
797 default:
798 arp_ptr += dev->addr_len;
799 }
785 memcpy(&tip, arp_ptr, 4); 800 memcpy(&tip, arp_ptr, 4);
786/* 801/*
787 * Check for bad requests for 127.x.x.x and requests for multicast 802 * Check for bad requests for 127.x.x.x and requests for multicast
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index c6287cd978c2..dfc39d4d48b7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -536,7 +536,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
536 return NULL; 536 return NULL;
537} 537}
538 538
539static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 539static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
540{ 540{
541 struct net *net = sock_net(skb->sk); 541 struct net *net = sock_net(skb->sk);
542 struct nlattr *tb[IFA_MAX+1]; 542 struct nlattr *tb[IFA_MAX+1];
@@ -801,7 +801,7 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
801 return NULL; 801 return NULL;
802} 802}
803 803
804static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 804static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
805{ 805{
806 struct net *net = sock_net(skb->sk); 806 struct net *net = sock_net(skb->sk);
807 struct in_ifaddr *ifa; 807 struct in_ifaddr *ifa;
@@ -1529,6 +1529,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1529 idx = 0; 1529 idx = 0;
1530 head = &net->dev_index_head[h]; 1530 head = &net->dev_index_head[h];
1531 rcu_read_lock(); 1531 rcu_read_lock();
1532 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1533 net->dev_base_seq;
1532 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1534 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1533 if (idx < s_idx) 1535 if (idx < s_idx)
1534 goto cont; 1536 goto cont;
@@ -1549,6 +1551,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1549 rcu_read_unlock(); 1551 rcu_read_unlock();
1550 goto done; 1552 goto done;
1551 } 1553 }
1554 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1552 } 1555 }
1553cont: 1556cont:
1554 idx++; 1557 idx++;
@@ -1760,8 +1763,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1760}; 1763};
1761 1764
1762static int inet_netconf_get_devconf(struct sk_buff *in_skb, 1765static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1763 struct nlmsghdr *nlh, 1766 struct nlmsghdr *nlh)
1764 void *arg)
1765{ 1767{
1766 struct net *net = sock_net(in_skb->sk); 1768 struct net *net = sock_net(in_skb->sk);
1767 struct nlattr *tb[NETCONFA_MAX+1]; 1769 struct nlattr *tb[NETCONFA_MAX+1];
@@ -1821,6 +1823,77 @@ errout:
1821 return err; 1823 return err;
1822} 1824}
1823 1825
1826static int inet_netconf_dump_devconf(struct sk_buff *skb,
1827 struct netlink_callback *cb)
1828{
1829 struct net *net = sock_net(skb->sk);
1830 int h, s_h;
1831 int idx, s_idx;
1832 struct net_device *dev;
1833 struct in_device *in_dev;
1834 struct hlist_head *head;
1835
1836 s_h = cb->args[0];
1837 s_idx = idx = cb->args[1];
1838
1839 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1840 idx = 0;
1841 head = &net->dev_index_head[h];
1842 rcu_read_lock();
1843 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1844 net->dev_base_seq;
1845 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1846 if (idx < s_idx)
1847 goto cont;
1848 in_dev = __in_dev_get_rcu(dev);
1849 if (!in_dev)
1850 goto cont;
1851
1852 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1853 &in_dev->cnf,
1854 NETLINK_CB(cb->skb).portid,
1855 cb->nlh->nlmsg_seq,
1856 RTM_NEWNETCONF,
1857 NLM_F_MULTI,
1858 -1) <= 0) {
1859 rcu_read_unlock();
1860 goto done;
1861 }
1862 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1863cont:
1864 idx++;
1865 }
1866 rcu_read_unlock();
1867 }
1868 if (h == NETDEV_HASHENTRIES) {
1869 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1870 net->ipv4.devconf_all,
1871 NETLINK_CB(cb->skb).portid,
1872 cb->nlh->nlmsg_seq,
1873 RTM_NEWNETCONF, NLM_F_MULTI,
1874 -1) <= 0)
1875 goto done;
1876 else
1877 h++;
1878 }
1879 if (h == NETDEV_HASHENTRIES + 1) {
1880 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1881 net->ipv4.devconf_dflt,
1882 NETLINK_CB(cb->skb).portid,
1883 cb->nlh->nlmsg_seq,
1884 RTM_NEWNETCONF, NLM_F_MULTI,
1885 -1) <= 0)
1886 goto done;
1887 else
1888 h++;
1889 }
1890done:
1891 cb->args[0] = h;
1892 cb->args[1] = idx;
1893
1894 return skb->len;
1895}
1896
1824#ifdef CONFIG_SYSCTL 1897#ifdef CONFIG_SYSCTL
1825 1898
1826static void devinet_copy_dflt_conf(struct net *net, int i) 1899static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -2225,6 +2298,6 @@ void __init devinet_init(void)
2225 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 2298 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2226 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 2299 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2227 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, 2300 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2228 NULL, NULL); 2301 inet_netconf_dump_devconf, NULL);
2229} 2302}
2230 2303
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index eb4bb12b3eb4..c7629a209f9d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -604,7 +604,7 @@ errout:
604 return err; 604 return err;
605} 605}
606 606
607static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 607static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
608{ 608{
609 struct net *net = sock_net(skb->sk); 609 struct net *net = sock_net(skb->sk);
610 struct fib_config cfg; 610 struct fib_config cfg;
@@ -626,7 +626,7 @@ errout:
626 return err; 626 return err;
627} 627}
628 628
629static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 629static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
630{ 630{
631 struct net *net = sock_net(skb->sk); 631 struct net *net = sock_net(skb->sk);
632 struct fib_config cfg; 632 struct fib_config cfg;
@@ -957,8 +957,8 @@ static void nl_fib_input(struct sk_buff *skb)
957 957
958 net = sock_net(skb->sk); 958 net = sock_net(skb->sk);
959 nlh = nlmsg_hdr(skb); 959 nlh = nlmsg_hdr(skb);
960 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 960 if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
961 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 961 nlmsg_len(nlh) < sizeof(*frn))
962 return; 962 return;
963 963
964 skb = skb_clone(skb, GFP_KERNEL); 964 skb = skb_clone(skb, GFP_KERNEL);
@@ -966,7 +966,7 @@ static void nl_fib_input(struct sk_buff *skb)
966 return; 966 return;
967 nlh = nlmsg_hdr(skb); 967 nlh = nlmsg_hdr(skb);
968 968
969 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 969 frn = (struct fib_result_nl *) nlmsg_data(nlh);
970 tb = fib_get_table(net, frn->tb_id_in); 970 tb = fib_get_table(net, frn->tb_id_in);
971 971
972 nl_fib_lookup(frn, tb); 972 nl_fib_lookup(frn, tb);
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 7a4c710c4cdd..d2d5a99fba09 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -27,11 +27,6 @@
27 27
28static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; 28static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
29static DEFINE_SPINLOCK(gre_proto_lock); 29static DEFINE_SPINLOCK(gre_proto_lock);
30struct gre_base_hdr {
31 __be16 flags;
32 __be16 protocol;
33};
34#define GRE_HEADER_SECTION 4
35 30
36int gre_add_protocol(const struct gre_protocol *proto, u8 version) 31int gre_add_protocol(const struct gre_protocol *proto, u8 version)
37{ 32{
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 786d97aee751..6acb541c9091 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -559,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
559 559
560int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) 560int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
561{ 561{
562 int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); 562 int err = req->rsk_ops->rtx_syn_ack(parent, req);
563 563
564 if (!err) 564 if (!err)
565 req->num_retrans++; 565 req->num_retrans++;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7afa2c3c788f..5f648751fce2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
158 158
159#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) 159#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)
160 160
161 if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 161 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
162 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
163 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
162 r->idiag_timer = 1; 164 r->idiag_timer = 1;
163 r->idiag_retrans = icsk->icsk_retransmits; 165 r->idiag_retrans = icsk->icsk_retransmits;
164 r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); 166 r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
@@ -322,7 +324,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
322 } 324 }
323 325
324 err = sk_diag_fill(sk, rep, req, 326 err = sk_diag_fill(sk, rep, req,
325 sk_user_ns(NETLINK_CB(in_skb).ssk), 327 sk_user_ns(NETLINK_CB(in_skb).sk),
326 NETLINK_CB(in_skb).portid, 328 NETLINK_CB(in_skb).portid,
327 nlh->nlmsg_seq, 0, nlh); 329 nlh->nlmsg_seq, 0, nlh);
328 if (err < 0) { 330 if (err < 0) {
@@ -628,7 +630,7 @@ static int inet_csk_diag_dump(struct sock *sk,
628 return 0; 630 return 0;
629 631
630 return inet_csk_diag_fill(sk, skb, r, 632 return inet_csk_diag_fill(sk, skb, r,
631 sk_user_ns(NETLINK_CB(cb->skb).ssk), 633 sk_user_ns(NETLINK_CB(cb->skb).sk),
632 NETLINK_CB(cb->skb).portid, 634 NETLINK_CB(cb->skb).portid,
633 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 635 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
634} 636}
@@ -803,7 +805,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
803 } 805 }
804 806
805 err = inet_diag_fill_req(skb, sk, req, 807 err = inet_diag_fill_req(skb, sk, req,
806 sk_user_ns(NETLINK_CB(cb->skb).ssk), 808 sk_user_ns(NETLINK_CB(cb->skb).sk),
807 NETLINK_CB(cb->skb).portid, 809 NETLINK_CB(cb->skb).portid,
808 cb->nlh->nlmsg_seq, cb->nlh); 810 cb->nlh->nlmsg_seq, cb->nlh);
809 if (err < 0) { 811 if (err < 0) {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index f4fd23de9b13..e97d66a1fdde 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -23,6 +23,28 @@
23 23
24#include <net/sock.h> 24#include <net/sock.h>
25#include <net/inet_frag.h> 25#include <net/inet_frag.h>
26#include <net/inet_ecn.h>
27
28/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
29 * Value : 0xff if frame should be dropped.
30 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
31 */
32const u8 ip_frag_ecn_table[16] = {
33 /* at least one fragment had CE, and others ECT_0 or ECT_1 */
34 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
35 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
36 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
37
38 /* invalid combinations : drop frame */
39 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
40 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
41 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
42 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
43 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
44 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
45 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
46};
47EXPORT_SYMBOL(ip_frag_ecn_table);
26 48
27static void inet_frag_secret_rebuild(unsigned long dummy) 49static void inet_frag_secret_rebuild(unsigned long dummy)
28{ 50{
@@ -30,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
30 unsigned long now = jiffies; 52 unsigned long now = jiffies;
31 int i; 53 int i;
32 54
55 /* Per bucket lock NOT needed here, due to write lock protection */
33 write_lock(&f->lock); 56 write_lock(&f->lock);
57
34 get_random_bytes(&f->rnd, sizeof(u32)); 58 get_random_bytes(&f->rnd, sizeof(u32));
35 for (i = 0; i < INETFRAGS_HASHSZ; i++) { 59 for (i = 0; i < INETFRAGS_HASHSZ; i++) {
60 struct inet_frag_bucket *hb;
36 struct inet_frag_queue *q; 61 struct inet_frag_queue *q;
37 struct hlist_node *n; 62 struct hlist_node *n;
38 63
39 hlist_for_each_entry_safe(q, n, &f->hash[i], list) { 64 hb = &f->hash[i];
65 hlist_for_each_entry_safe(q, n, &hb->chain, list) {
40 unsigned int hval = f->hashfn(q); 66 unsigned int hval = f->hashfn(q);
41 67
42 if (hval != i) { 68 if (hval != i) {
69 struct inet_frag_bucket *hb_dest;
70
43 hlist_del(&q->list); 71 hlist_del(&q->list);
44 72
45 /* Relink to new hash chain. */ 73 /* Relink to new hash chain. */
46 hlist_add_head(&q->list, &f->hash[hval]); 74 hb_dest = &f->hash[hval];
75 hlist_add_head(&q->list, &hb_dest->chain);
47 } 76 }
48 } 77 }
49 } 78 }
@@ -56,9 +85,12 @@ void inet_frags_init(struct inet_frags *f)
56{ 85{
57 int i; 86 int i;
58 87
59 for (i = 0; i < INETFRAGS_HASHSZ; i++) 88 for (i = 0; i < INETFRAGS_HASHSZ; i++) {
60 INIT_HLIST_HEAD(&f->hash[i]); 89 struct inet_frag_bucket *hb = &f->hash[i];
61 90
91 spin_lock_init(&hb->chain_lock);
92 INIT_HLIST_HEAD(&hb->chain);
93 }
62 rwlock_init(&f->lock); 94 rwlock_init(&f->lock);
63 95
64 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 96 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
@@ -100,10 +132,18 @@ EXPORT_SYMBOL(inet_frags_exit_net);
100 132
101static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) 133static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
102{ 134{
103 write_lock(&f->lock); 135 struct inet_frag_bucket *hb;
136 unsigned int hash;
137
138 read_lock(&f->lock);
139 hash = f->hashfn(fq);
140 hb = &f->hash[hash];
141
142 spin_lock(&hb->chain_lock);
104 hlist_del(&fq->list); 143 hlist_del(&fq->list);
105 fq->net->nqueues--; 144 spin_unlock(&hb->chain_lock);
106 write_unlock(&f->lock); 145
146 read_unlock(&f->lock);
107 inet_frag_lru_del(fq); 147 inet_frag_lru_del(fq);
108} 148}
109 149
@@ -182,6 +222,9 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
182 q = list_first_entry(&nf->lru_list, 222 q = list_first_entry(&nf->lru_list,
183 struct inet_frag_queue, lru_list); 223 struct inet_frag_queue, lru_list);
184 atomic_inc(&q->refcnt); 224 atomic_inc(&q->refcnt);
225 /* Remove q from list to avoid several CPUs grabbing it */
226 list_del_init(&q->lru_list);
227
185 spin_unlock(&nf->lru_lock); 228 spin_unlock(&nf->lru_lock);
186 229
187 spin_lock(&q->lock); 230 spin_lock(&q->lock);
@@ -202,27 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
202 struct inet_frag_queue *qp_in, struct inet_frags *f, 245 struct inet_frag_queue *qp_in, struct inet_frags *f,
203 void *arg) 246 void *arg)
204{ 247{
248 struct inet_frag_bucket *hb;
205 struct inet_frag_queue *qp; 249 struct inet_frag_queue *qp;
206#ifdef CONFIG_SMP 250#ifdef CONFIG_SMP
207#endif 251#endif
208 unsigned int hash; 252 unsigned int hash;
209 253
210 write_lock(&f->lock); 254 read_lock(&f->lock); /* Protects against hash rebuild */
211 /* 255 /*
212 * While we stayed w/o the lock other CPU could update 256 * While we stayed w/o the lock other CPU could update
213 * the rnd seed, so we need to re-calculate the hash 257 * the rnd seed, so we need to re-calculate the hash
214 * chain. Fortunatelly the qp_in can be used to get one. 258 * chain. Fortunatelly the qp_in can be used to get one.
215 */ 259 */
216 hash = f->hashfn(qp_in); 260 hash = f->hashfn(qp_in);
261 hb = &f->hash[hash];
262 spin_lock(&hb->chain_lock);
263
217#ifdef CONFIG_SMP 264#ifdef CONFIG_SMP
218 /* With SMP race we have to recheck hash table, because 265 /* With SMP race we have to recheck hash table, because
219 * such entry could be created on other cpu, while we 266 * such entry could be created on other cpu, while we
220 * promoted read lock to write lock. 267 * released the hash bucket lock.
221 */ 268 */
222 hlist_for_each_entry(qp, &f->hash[hash], list) { 269 hlist_for_each_entry(qp, &hb->chain, list) {
223 if (qp->net == nf && f->match(qp, arg)) { 270 if (qp->net == nf && f->match(qp, arg)) {
224 atomic_inc(&qp->refcnt); 271 atomic_inc(&qp->refcnt);
225 write_unlock(&f->lock); 272 spin_unlock(&hb->chain_lock);
273 read_unlock(&f->lock);
226 qp_in->last_in |= INET_FRAG_COMPLETE; 274 qp_in->last_in |= INET_FRAG_COMPLETE;
227 inet_frag_put(qp_in, f); 275 inet_frag_put(qp_in, f);
228 return qp; 276 return qp;
@@ -234,9 +282,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
234 atomic_inc(&qp->refcnt); 282 atomic_inc(&qp->refcnt);
235 283
236 atomic_inc(&qp->refcnt); 284 atomic_inc(&qp->refcnt);
237 hlist_add_head(&qp->list, &f->hash[hash]); 285 hlist_add_head(&qp->list, &hb->chain);
238 nf->nqueues++; 286 spin_unlock(&hb->chain_lock);
239 write_unlock(&f->lock); 287 read_unlock(&f->lock);
240 inet_frag_lru_add(nf, qp); 288 inet_frag_lru_add(nf, qp);
241 return qp; 289 return qp;
242} 290}
@@ -277,17 +325,23 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
277 struct inet_frags *f, void *key, unsigned int hash) 325 struct inet_frags *f, void *key, unsigned int hash)
278 __releases(&f->lock) 326 __releases(&f->lock)
279{ 327{
328 struct inet_frag_bucket *hb;
280 struct inet_frag_queue *q; 329 struct inet_frag_queue *q;
281 int depth = 0; 330 int depth = 0;
282 331
283 hlist_for_each_entry(q, &f->hash[hash], list) { 332 hb = &f->hash[hash];
333
334 spin_lock(&hb->chain_lock);
335 hlist_for_each_entry(q, &hb->chain, list) {
284 if (q->net == nf && f->match(q, key)) { 336 if (q->net == nf && f->match(q, key)) {
285 atomic_inc(&q->refcnt); 337 atomic_inc(&q->refcnt);
338 spin_unlock(&hb->chain_lock);
286 read_unlock(&f->lock); 339 read_unlock(&f->lock);
287 return q; 340 return q;
288 } 341 }
289 depth++; 342 depth++;
290 } 343 }
344 spin_unlock(&hb->chain_lock);
291 read_unlock(&f->lock); 345 read_unlock(&f->lock);
292 346
293 if (depth <= INETFRAGS_MAXDEPTH) 347 if (depth <= INETFRAGS_MAXDEPTH)
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index cc280a3f4f96..1975f52933c5 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -29,6 +29,7 @@
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/if_vlan.h> 30#include <linux/if_vlan.h>
31#include <linux/inet_lro.h> 31#include <linux/inet_lro.h>
32#include <net/checksum.h>
32 33
33MODULE_LICENSE("GPL"); 34MODULE_LICENSE("GPL");
34MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); 35MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
@@ -114,11 +115,9 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
114 *(p+2) = lro_desc->tcp_rcv_tsecr; 115 *(p+2) = lro_desc->tcp_rcv_tsecr;
115 } 116 }
116 117
118 csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
117 iph->tot_len = htons(lro_desc->ip_tot_len); 119 iph->tot_len = htons(lro_desc->ip_tot_len);
118 120
119 iph->check = 0;
120 iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
121
122 tcph->check = 0; 121 tcph->check = 0;
123 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); 122 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
124 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); 123 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 52c273ea05c3..b66910aaef4d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -79,40 +79,11 @@ struct ipq {
79 struct inet_peer *peer; 79 struct inet_peer *peer;
80}; 80};
81 81
82/* RFC 3168 support :
83 * We want to check ECN values of all fragments, do detect invalid combinations.
84 * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
85 */
86#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */
87#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */
88#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */
89#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */
90
91static inline u8 ip4_frag_ecn(u8 tos) 82static inline u8 ip4_frag_ecn(u8 tos)
92{ 83{
93 return 1 << (tos & INET_ECN_MASK); 84 return 1 << (tos & INET_ECN_MASK);
94} 85}
95 86
96/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
97 * Value : 0xff if frame should be dropped.
98 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
99 */
100static const u8 ip4_frag_ecn_table[16] = {
101 /* at least one fragment had CE, and others ECT_0 or ECT_1 */
102 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
103 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
104 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
105
106 /* invalid combinations : drop frame */
107 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
108 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
109 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
110 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
111 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
112 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
113 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
114};
115
116static struct inet_frags ip4_frags; 87static struct inet_frags ip4_frags;
117 88
118int ip_frag_nqueues(struct net *net) 89int ip_frag_nqueues(struct net *net)
@@ -557,7 +528,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
557 528
558 ipq_kill(qp); 529 ipq_kill(qp);
559 530
560 ecn = ip4_frag_ecn_table[qp->ecn]; 531 ecn = ip_frag_ecn_table[qp->ecn];
561 if (unlikely(ecn == 0xff)) { 532 if (unlikely(ecn == 0xff)) {
562 err = -EINVAL; 533 err = -EINVAL;
563 goto out_fail; 534 goto out_fail;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 91d66dbde9c0..987a4e5e07e2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -37,7 +37,7 @@
37#include <net/ip.h> 37#include <net/ip.h>
38#include <net/icmp.h> 38#include <net/icmp.h>
39#include <net/protocol.h> 39#include <net/protocol.h>
40#include <net/ipip.h> 40#include <net/ip_tunnels.h>
41#include <net/arp.h> 41#include <net/arp.h>
42#include <net/checksum.h> 42#include <net/checksum.h>
43#include <net/dsfield.h> 43#include <net/dsfield.h>
@@ -108,15 +108,6 @@
108 fatal route to network, even if it were you who configured 108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-) 109 fatal static route: you are innocent. :-)
110 110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov. 111 Alexey Kuznetsov.
121 */ 112 */
122 113
@@ -126,400 +117,137 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 117
127static struct rtnl_link_ops ipgre_link_ops __read_mostly; 118static struct rtnl_link_ops ipgre_link_ops __read_mostly;
128static int ipgre_tunnel_init(struct net_device *dev); 119static int ipgre_tunnel_init(struct net_device *dev);
129static void ipgre_tunnel_setup(struct net_device *dev);
130static int ipgre_tunnel_bind_dev(struct net_device *dev);
131
132/* Fallback tunnel: no source, no destination, no key, no options */
133
134#define HASH_SIZE 16
135 120
136static int ipgre_net_id __read_mostly; 121static int ipgre_net_id __read_mostly;
137struct ipgre_net { 122static int gre_tap_net_id __read_mostly;
138 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
139
140 struct net_device *fb_tunnel_dev;
141};
142
143/* Tunnel hash table */
144
145/*
146 4 hash tables:
147
148 3: (remote,local)
149 2: (remote,*)
150 1: (*,local)
151 0: (*,*)
152 123
153 We require exact key match i.e. if a key is present in packet 124static __sum16 check_checksum(struct sk_buff *skb)
154 it will match only tunnel with the same key; if it is not present, 125{
155 it will match only keyless tunnel. 126 __sum16 csum = 0;
156
157 All keysless packets, if not matched configured keyless tunnels
158 will match fallback tunnel.
159 */
160 127
161#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 128 switch (skb->ip_summed) {
129 case CHECKSUM_COMPLETE:
130 csum = csum_fold(skb->csum);
162 131
163#define tunnels_r_l tunnels[3] 132 if (!csum)
164#define tunnels_r tunnels[2] 133 break;
165#define tunnels_l tunnels[1] 134 /* Fall through. */
166#define tunnels_wc tunnels[0]
167 135
168static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, 136 case CHECKSUM_NONE:
169 struct rtnl_link_stats64 *tot) 137 skb->csum = 0;
170{ 138 csum = __skb_checksum_complete(skb);
171 int i; 139 skb->ip_summed = CHECKSUM_COMPLETE;
172 140 break;
173 for_each_possible_cpu(i) {
174 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
175 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
176 unsigned int start;
177
178 do {
179 start = u64_stats_fetch_begin_bh(&tstats->syncp);
180 rx_packets = tstats->rx_packets;
181 tx_packets = tstats->tx_packets;
182 rx_bytes = tstats->rx_bytes;
183 tx_bytes = tstats->tx_bytes;
184 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
185
186 tot->rx_packets += rx_packets;
187 tot->tx_packets += tx_packets;
188 tot->rx_bytes += rx_bytes;
189 tot->tx_bytes += tx_bytes;
190 } 141 }
191 142
192 tot->multicast = dev->stats.multicast; 143 return csum;
193 tot->rx_crc_errors = dev->stats.rx_crc_errors;
194 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
195 tot->rx_length_errors = dev->stats.rx_length_errors;
196 tot->rx_frame_errors = dev->stats.rx_frame_errors;
197 tot->rx_errors = dev->stats.rx_errors;
198
199 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
200 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
201 tot->tx_dropped = dev->stats.tx_dropped;
202 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
203 tot->tx_errors = dev->stats.tx_errors;
204
205 return tot;
206} 144}
207 145
208/* Does key in tunnel parameters match packet */ 146static int ip_gre_calc_hlen(__be16 o_flags)
209static bool ipgre_key_match(const struct ip_tunnel_parm *p,
210 __be16 flags, __be32 key)
211{ 147{
212 if (p->i_flags & GRE_KEY) { 148 int addend = 4;
213 if (flags & GRE_KEY)
214 return key == p->i_key;
215 else
216 return false; /* key expected, none present */
217 } else
218 return !(flags & GRE_KEY);
219}
220 149
221/* Given src, dst and key, find appropriate for input tunnel. */ 150 if (o_flags&TUNNEL_CSUM)
151 addend += 4;
152 if (o_flags&TUNNEL_KEY)
153 addend += 4;
154 if (o_flags&TUNNEL_SEQ)
155 addend += 4;
156 return addend;
157}
222 158
223static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, 159static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
224 __be32 remote, __be32 local, 160 bool *csum_err, int *hdr_len)
225 __be16 flags, __be32 key,
226 __be16 gre_proto)
227{ 161{
228 struct net *net = dev_net(dev); 162 unsigned int ip_hlen = ip_hdrlen(skb);
229 int link = dev->ifindex; 163 const struct gre_base_hdr *greh;
230 unsigned int h0 = HASH(remote); 164 __be32 *options;
231 unsigned int h1 = HASH(key);
232 struct ip_tunnel *t, *cand = NULL;
233 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
234 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
235 ARPHRD_ETHER : ARPHRD_IPGRE;
236 int score, cand_score = 4;
237
238 for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
239 if (local != t->parms.iph.saddr ||
240 remote != t->parms.iph.daddr ||
241 !(t->dev->flags & IFF_UP))
242 continue;
243
244 if (!ipgre_key_match(&t->parms, flags, key))
245 continue;
246
247 if (t->dev->type != ARPHRD_IPGRE &&
248 t->dev->type != dev_type)
249 continue;
250
251 score = 0;
252 if (t->parms.link != link)
253 score |= 1;
254 if (t->dev->type != dev_type)
255 score |= 2;
256 if (score == 0)
257 return t;
258
259 if (score < cand_score) {
260 cand = t;
261 cand_score = score;
262 }
263 }
264 165
265 for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { 166 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
266 if (remote != t->parms.iph.daddr || 167 return -EINVAL;
267 !(t->dev->flags & IFF_UP))
268 continue;
269
270 if (!ipgre_key_match(&t->parms, flags, key))
271 continue;
272
273 if (t->dev->type != ARPHRD_IPGRE &&
274 t->dev->type != dev_type)
275 continue;
276
277 score = 0;
278 if (t->parms.link != link)
279 score |= 1;
280 if (t->dev->type != dev_type)
281 score |= 2;
282 if (score == 0)
283 return t;
284
285 if (score < cand_score) {
286 cand = t;
287 cand_score = score;
288 }
289 }
290 168
291 for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { 169 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
292 if ((local != t->parms.iph.saddr && 170 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
293 (local != t->parms.iph.daddr || 171 return -EINVAL;
294 !ipv4_is_multicast(local))) ||
295 !(t->dev->flags & IFF_UP))
296 continue;
297
298 if (!ipgre_key_match(&t->parms, flags, key))
299 continue;
300
301 if (t->dev->type != ARPHRD_IPGRE &&
302 t->dev->type != dev_type)
303 continue;
304
305 score = 0;
306 if (t->parms.link != link)
307 score |= 1;
308 if (t->dev->type != dev_type)
309 score |= 2;
310 if (score == 0)
311 return t;
312
313 if (score < cand_score) {
314 cand = t;
315 cand_score = score;
316 }
317 }
318 172
319 for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { 173 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
320 if (t->parms.i_key != key || 174 *hdr_len = ip_gre_calc_hlen(tpi->flags);
321 !(t->dev->flags & IFF_UP))
322 continue;
323
324 if (t->dev->type != ARPHRD_IPGRE &&
325 t->dev->type != dev_type)
326 continue;
327
328 score = 0;
329 if (t->parms.link != link)
330 score |= 1;
331 if (t->dev->type != dev_type)
332 score |= 2;
333 if (score == 0)
334 return t;
335
336 if (score < cand_score) {
337 cand = t;
338 cand_score = score;
339 }
340 }
341 175
342 if (cand != NULL) 176 if (!pskb_may_pull(skb, *hdr_len))
343 return cand; 177 return -EINVAL;
344 178
345 dev = ign->fb_tunnel_dev; 179 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
346 if (dev->flags & IFF_UP)
347 return netdev_priv(dev);
348 180
349 return NULL; 181 tpi->proto = greh->protocol;
350}
351 182
352static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign, 183 options = (__be32 *)(greh + 1);
353 struct ip_tunnel_parm *parms) 184 if (greh->flags & GRE_CSUM) {
354{ 185 if (check_checksum(skb)) {
355 __be32 remote = parms->iph.daddr; 186 *csum_err = true;
356 __be32 local = parms->iph.saddr; 187 return -EINVAL;
357 __be32 key = parms->i_key; 188 }
358 unsigned int h = HASH(key); 189 options++;
359 int prio = 0;
360
361 if (local)
362 prio |= 1;
363 if (remote && !ipv4_is_multicast(remote)) {
364 prio |= 2;
365 h ^= HASH(remote);
366 } 190 }
367 191
368 return &ign->tunnels[prio][h]; 192 if (greh->flags & GRE_KEY) {
369} 193 tpi->key = *options;
370 194 options++;
371static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign, 195 } else
372 struct ip_tunnel *t) 196 tpi->key = 0;
373{
374 return __ipgre_bucket(ign, &t->parms);
375}
376
377static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
378{
379 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
380 197
381 rcu_assign_pointer(t->next, rtnl_dereference(*tp)); 198 if (unlikely(greh->flags & GRE_SEQ)) {
382 rcu_assign_pointer(*tp, t); 199 tpi->seq = *options;
383} 200 options++;
201 } else
202 tpi->seq = 0;
384 203
385static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 204 /* WCCP version 1 and 2 protocol decoding.
386{ 205 * - Change protocol to IP
387 struct ip_tunnel __rcu **tp; 206 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
388 struct ip_tunnel *iter; 207 */
389 208 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
390 for (tp = ipgre_bucket(ign, t); 209 tpi->proto = htons(ETH_P_IP);
391 (iter = rtnl_dereference(*tp)) != NULL; 210 if ((*(u8 *)options & 0xF0) != 0x40) {
392 tp = &iter->next) { 211 *hdr_len += 4;
393 if (t == iter) { 212 if (!pskb_may_pull(skb, *hdr_len))
394 rcu_assign_pointer(*tp, t->next); 213 return -EINVAL;
395 break;
396 } 214 }
397 } 215 }
398}
399
400static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
401 struct ip_tunnel_parm *parms,
402 int type)
403{
404 __be32 remote = parms->iph.daddr;
405 __be32 local = parms->iph.saddr;
406 __be32 key = parms->i_key;
407 int link = parms->link;
408 struct ip_tunnel *t;
409 struct ip_tunnel __rcu **tp;
410 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
411
412 for (tp = __ipgre_bucket(ign, parms);
413 (t = rtnl_dereference(*tp)) != NULL;
414 tp = &t->next)
415 if (local == t->parms.iph.saddr &&
416 remote == t->parms.iph.daddr &&
417 key == t->parms.i_key &&
418 link == t->parms.link &&
419 type == t->dev->type)
420 break;
421
422 return t;
423}
424
425static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
426 struct ip_tunnel_parm *parms, int create)
427{
428 struct ip_tunnel *t, *nt;
429 struct net_device *dev;
430 char name[IFNAMSIZ];
431 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
432
433 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
434 if (t || !create)
435 return t;
436
437 if (parms->name[0])
438 strlcpy(name, parms->name, IFNAMSIZ);
439 else
440 strcpy(name, "gre%d");
441
442 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
443 if (!dev)
444 return NULL;
445
446 dev_net_set(dev, net);
447
448 nt = netdev_priv(dev);
449 nt->parms = *parms;
450 dev->rtnl_link_ops = &ipgre_link_ops;
451
452 dev->mtu = ipgre_tunnel_bind_dev(dev);
453 216
454 if (register_netdevice(dev) < 0) 217 return 0;
455 goto failed_free;
456
457 /* Can use a lockless transmit, unless we generate output sequences */
458 if (!(nt->parms.o_flags & GRE_SEQ))
459 dev->features |= NETIF_F_LLTX;
460
461 dev_hold(dev);
462 ipgre_tunnel_link(ign, nt);
463 return nt;
464
465failed_free:
466 free_netdev(dev);
467 return NULL;
468}
469
470static void ipgre_tunnel_uninit(struct net_device *dev)
471{
472 struct net *net = dev_net(dev);
473 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
474
475 ipgre_tunnel_unlink(ign, netdev_priv(dev));
476 dev_put(dev);
477} 218}
478 219
479
480static void ipgre_err(struct sk_buff *skb, u32 info) 220static void ipgre_err(struct sk_buff *skb, u32 info)
481{ 221{
482 222
483/* All the routers (except for Linux) return only 223 /* All the routers (except for Linux) return only
484 8 bytes of packet payload. It means, that precise relaying of 224 8 bytes of packet payload. It means, that precise relaying of
485 ICMP in the real Internet is absolutely infeasible. 225 ICMP in the real Internet is absolutely infeasible.
486 226
487 Moreover, Cisco "wise men" put GRE key to the third word 227 Moreover, Cisco "wise men" put GRE key to the third word
488 in GRE header. It makes impossible maintaining even soft state for keyed 228 in GRE header. It makes impossible maintaining even soft
489 GRE tunnels with enabled checksum. Tell them "thank you". 229 state for keyed GRE tunnels with enabled checksum. Tell
490 230 them "thank you".
491 Well, I wonder, rfc1812 was written by Cisco employee,
492 what the hell these idiots break standards established
493 by themselves???
494 */
495 231
232 Well, I wonder, rfc1812 was written by Cisco employee,
233 what the hell these idiots break standards established
234 by themselves???
235 */
236 struct net *net = dev_net(skb->dev);
237 struct ip_tunnel_net *itn;
496 const struct iphdr *iph = (const struct iphdr *)skb->data; 238 const struct iphdr *iph = (const struct iphdr *)skb->data;
497 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
498 int grehlen = (iph->ihl<<2) + 4;
499 const int type = icmp_hdr(skb)->type; 239 const int type = icmp_hdr(skb)->type;
500 const int code = icmp_hdr(skb)->code; 240 const int code = icmp_hdr(skb)->code;
501 struct ip_tunnel *t; 241 struct ip_tunnel *t;
502 __be16 flags; 242 struct tnl_ptk_info tpi;
503 __be32 key = 0; 243 int hdr_len;
244 bool csum_err = false;
504 245
505 flags = p[0]; 246 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
506 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 247 if (!csum_err) /* ignore csum errors. */
507 if (flags&(GRE_VERSION|GRE_ROUTING))
508 return; 248 return;
509 if (flags&GRE_KEY) {
510 grehlen += 4;
511 if (flags&GRE_CSUM)
512 grehlen += 4;
513 }
514 } 249 }
515 250
516 /* If only 8 bytes returned, keyed message will be dropped here */
517 if (skb_headlen(skb) < grehlen)
518 return;
519
520 if (flags & GRE_KEY)
521 key = *(((__be32 *)p) + (grehlen / 4) - 1);
522
523 switch (type) { 251 switch (type) {
524 default: 252 default:
525 case ICMP_PARAMETERPROB: 253 case ICMP_PARAMETERPROB:
@@ -548,8 +276,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
548 break; 276 break;
549 } 277 }
550 278
551 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, 279 if (tpi.proto == htons(ETH_P_TEB))
552 flags, key, p[1]); 280 itn = net_generic(net, gre_tap_net_id);
281 else
282 itn = net_generic(net, ipgre_net_id);
283
284 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
285 iph->daddr, iph->saddr, tpi.key);
553 286
554 if (t == NULL) 287 if (t == NULL)
555 return; 288 return;
@@ -578,158 +311,33 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
578 t->err_time = jiffies; 311 t->err_time = jiffies;
579} 312}
580 313
581static inline u8
582ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
583{
584 u8 inner = 0;
585 if (skb->protocol == htons(ETH_P_IP))
586 inner = old_iph->tos;
587 else if (skb->protocol == htons(ETH_P_IPV6))
588 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
589 return INET_ECN_encapsulate(tos, inner);
590}
591
592static int ipgre_rcv(struct sk_buff *skb) 314static int ipgre_rcv(struct sk_buff *skb)
593{ 315{
316 struct net *net = dev_net(skb->dev);
317 struct ip_tunnel_net *itn;
594 const struct iphdr *iph; 318 const struct iphdr *iph;
595 u8 *h;
596 __be16 flags;
597 __sum16 csum = 0;
598 __be32 key = 0;
599 u32 seqno = 0;
600 struct ip_tunnel *tunnel; 319 struct ip_tunnel *tunnel;
601 int offset = 4; 320 struct tnl_ptk_info tpi;
602 __be16 gre_proto; 321 int hdr_len;
603 int err; 322 bool csum_err = false;
604 323
605 if (!pskb_may_pull(skb, 16)) 324 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
606 goto drop; 325 goto drop;
607 326
608 iph = ip_hdr(skb); 327 if (tpi.proto == htons(ETH_P_TEB))
609 h = skb->data; 328 itn = net_generic(net, gre_tap_net_id);
610 flags = *(__be16 *)h; 329 else
611 330 itn = net_generic(net, ipgre_net_id);
612 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
613 /* - Version must be 0.
614 - We do not support routing headers.
615 */
616 if (flags&(GRE_VERSION|GRE_ROUTING))
617 goto drop;
618
619 if (flags&GRE_CSUM) {
620 switch (skb->ip_summed) {
621 case CHECKSUM_COMPLETE:
622 csum = csum_fold(skb->csum);
623 if (!csum)
624 break;
625 /* fall through */
626 case CHECKSUM_NONE:
627 skb->csum = 0;
628 csum = __skb_checksum_complete(skb);
629 skb->ip_summed = CHECKSUM_COMPLETE;
630 }
631 offset += 4;
632 }
633 if (flags&GRE_KEY) {
634 key = *(__be32 *)(h + offset);
635 offset += 4;
636 }
637 if (flags&GRE_SEQ) {
638 seqno = ntohl(*(__be32 *)(h + offset));
639 offset += 4;
640 }
641 }
642 331
643 gre_proto = *(__be16 *)(h + 2); 332 iph = ip_hdr(skb);
333 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
334 iph->saddr, iph->daddr, tpi.key);
644 335
645 tunnel = ipgre_tunnel_lookup(skb->dev,
646 iph->saddr, iph->daddr, flags, key,
647 gre_proto);
648 if (tunnel) { 336 if (tunnel) {
649 struct pcpu_tstats *tstats; 337 ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
650
651 secpath_reset(skb);
652
653 skb->protocol = gre_proto;
654 /* WCCP version 1 and 2 protocol decoding.
655 * - Change protocol to IP
656 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
657 */
658 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
659 skb->protocol = htons(ETH_P_IP);
660 if ((*(h + offset) & 0xF0) != 0x40)
661 offset += 4;
662 }
663
664 skb->mac_header = skb->network_header;
665 __pskb_pull(skb, offset);
666 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
667 skb->pkt_type = PACKET_HOST;
668#ifdef CONFIG_NET_IPGRE_BROADCAST
669 if (ipv4_is_multicast(iph->daddr)) {
670 /* Looped back packet, drop it! */
671 if (rt_is_output_route(skb_rtable(skb)))
672 goto drop;
673 tunnel->dev->stats.multicast++;
674 skb->pkt_type = PACKET_BROADCAST;
675 }
676#endif
677
678 if (((flags&GRE_CSUM) && csum) ||
679 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
680 tunnel->dev->stats.rx_crc_errors++;
681 tunnel->dev->stats.rx_errors++;
682 goto drop;
683 }
684 if (tunnel->parms.i_flags&GRE_SEQ) {
685 if (!(flags&GRE_SEQ) ||
686 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
687 tunnel->dev->stats.rx_fifo_errors++;
688 tunnel->dev->stats.rx_errors++;
689 goto drop;
690 }
691 tunnel->i_seqno = seqno + 1;
692 }
693
694 /* Warning: All skb pointers will be invalidated! */
695 if (tunnel->dev->type == ARPHRD_ETHER) {
696 if (!pskb_may_pull(skb, ETH_HLEN)) {
697 tunnel->dev->stats.rx_length_errors++;
698 tunnel->dev->stats.rx_errors++;
699 goto drop;
700 }
701
702 iph = ip_hdr(skb);
703 skb->protocol = eth_type_trans(skb, tunnel->dev);
704 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
705 }
706
707 __skb_tunnel_rx(skb, tunnel->dev);
708
709 skb_reset_network_header(skb);
710 err = IP_ECN_decapsulate(iph, skb);
711 if (unlikely(err)) {
712 if (log_ecn_error)
713 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
714 &iph->saddr, iph->tos);
715 if (err > 1) {
716 ++tunnel->dev->stats.rx_frame_errors;
717 ++tunnel->dev->stats.rx_errors;
718 goto drop;
719 }
720 }
721
722 tstats = this_cpu_ptr(tunnel->dev->tstats);
723 u64_stats_update_begin(&tstats->syncp);
724 tstats->rx_packets++;
725 tstats->rx_bytes += skb->len;
726 u64_stats_update_end(&tstats->syncp);
727
728 gro_cells_receive(&tunnel->gro_cells, skb);
729 return 0; 338 return 0;
730 } 339 }
731 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 340 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
732
733drop: 341drop:
734 kfree_skb(skb); 342 kfree_skb(skb);
735 return 0; 343 return 0;
@@ -746,7 +354,7 @@ static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff
746 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; 354 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
747 return skb; 355 return skb;
748 } else if (skb->ip_summed == CHECKSUM_PARTIAL && 356 } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
749 tunnel->parms.o_flags&GRE_CSUM) { 357 tunnel->parms.o_flags&TUNNEL_CSUM) {
750 err = skb_checksum_help(skb); 358 err = skb_checksum_help(skb);
751 if (unlikely(err)) 359 if (unlikely(err))
752 goto error; 360 goto error;
@@ -760,494 +368,157 @@ error:
760 return ERR_PTR(err); 368 return ERR_PTR(err);
761} 369}
762 370
763static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 371static struct sk_buff *gre_build_header(struct sk_buff *skb,
372 const struct tnl_ptk_info *tpi,
373 int hdr_len)
764{ 374{
765 struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); 375 struct gre_base_hdr *greh;
766 struct ip_tunnel *tunnel = netdev_priv(dev);
767 const struct iphdr *old_iph;
768 const struct iphdr *tiph;
769 struct flowi4 fl4;
770 u8 tos;
771 __be16 df;
772 struct rtable *rt; /* Route to the other host */
773 struct net_device *tdev; /* Device to other host */
774 struct iphdr *iph; /* Our new IP header */
775 unsigned int max_headroom; /* The extra header space needed */
776 int gre_hlen;
777 __be32 dst;
778 int mtu;
779 u8 ttl;
780 int err;
781 int pkt_len;
782
783 skb = handle_offloads(tunnel, skb);
784 if (IS_ERR(skb)) {
785 dev->stats.tx_dropped++;
786 return NETDEV_TX_OK;
787 }
788 376
789 if (!skb->encapsulation) { 377 skb_push(skb, hdr_len);
790 skb_reset_inner_headers(skb);
791 skb->encapsulation = 1;
792 }
793 378
794 old_iph = ip_hdr(skb); 379 greh = (struct gre_base_hdr *)skb->data;
380 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
381 greh->protocol = tpi->proto;
795 382
796 if (dev->type == ARPHRD_ETHER) 383 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
797 IPCB(skb)->flags = 0; 384 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
798 385
799 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 386 if (tpi->flags&TUNNEL_SEQ) {
800 gre_hlen = 0; 387 *ptr = tpi->seq;
801 tiph = (const struct iphdr *)skb->data; 388 ptr--;
802 } else {
803 gre_hlen = tunnel->hlen;
804 tiph = &tunnel->parms.iph;
805 }
806
807 if ((dst = tiph->daddr) == 0) {
808 /* NBMA tunnel */
809
810 if (skb_dst(skb) == NULL) {
811 dev->stats.tx_fifo_errors++;
812 goto tx_error;
813 } 389 }
814 390 if (tpi->flags&TUNNEL_KEY) {
815 if (skb->protocol == htons(ETH_P_IP)) { 391 *ptr = tpi->key;
816 rt = skb_rtable(skb); 392 ptr--;
817 dst = rt_nexthop(rt, old_iph->daddr);
818 } 393 }
819#if IS_ENABLED(CONFIG_IPV6) 394 if (tpi->flags&TUNNEL_CSUM &&
820 else if (skb->protocol == htons(ETH_P_IPV6)) { 395 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
821 const struct in6_addr *addr6; 396 *(__sum16 *)ptr = 0;
822 struct neighbour *neigh; 397 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
823 bool do_tx_error_icmp; 398 skb->len, 0));
824 int addr_type;
825
826 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
827 if (neigh == NULL)
828 goto tx_error;
829
830 addr6 = (const struct in6_addr *)&neigh->primary_key;
831 addr_type = ipv6_addr_type(addr6);
832
833 if (addr_type == IPV6_ADDR_ANY) {
834 addr6 = &ipv6_hdr(skb)->daddr;
835 addr_type = ipv6_addr_type(addr6);
836 }
837
838 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
839 do_tx_error_icmp = true;
840 else {
841 do_tx_error_icmp = false;
842 dst = addr6->s6_addr32[3];
843 }
844 neigh_release(neigh);
845 if (do_tx_error_icmp)
846 goto tx_error_icmp;
847 } 399 }
848#endif
849 else
850 goto tx_error;
851 } 400 }
852 401
853 ttl = tiph->ttl; 402 return skb;
854 tos = tiph->tos; 403}
855 if (tos & 0x1) {
856 tos &= ~0x1;
857 if (skb->protocol == htons(ETH_P_IP))
858 tos = old_iph->tos;
859 else if (skb->protocol == htons(ETH_P_IPV6))
860 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
861 }
862 404
863 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr, 405static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
864 tunnel->parms.o_key, RT_TOS(tos), 406 const struct iphdr *tnl_params,
865 tunnel->parms.link); 407 __be16 proto)
866 if (IS_ERR(rt)) { 408{
867 dev->stats.tx_carrier_errors++; 409 struct ip_tunnel *tunnel = netdev_priv(dev);
868 goto tx_error; 410 struct tnl_ptk_info tpi;
869 }
870 tdev = rt->dst.dev;
871 411
872 if (tdev == dev) { 412 if (likely(!skb->encapsulation)) {
873 ip_rt_put(rt); 413 skb_reset_inner_headers(skb);
874 dev->stats.collisions++; 414 skb->encapsulation = 1;
875 goto tx_error;
876 } 415 }
877 416
878 df = tiph->frag_off; 417 tpi.flags = tunnel->parms.o_flags;
879 if (df) 418 tpi.proto = proto;
880 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; 419 tpi.key = tunnel->parms.o_key;
881 else 420 if (tunnel->parms.o_flags & TUNNEL_SEQ)
882 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 421 tunnel->o_seqno++;
883 422 tpi.seq = htonl(tunnel->o_seqno);
884 if (skb_dst(skb))
885 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
886
887 if (skb->protocol == htons(ETH_P_IP)) {
888 df |= (old_iph->frag_off&htons(IP_DF));
889 423
890 if (!skb_is_gso(skb) && 424 /* Push GRE header. */
891 (old_iph->frag_off&htons(IP_DF)) && 425 skb = gre_build_header(skb, &tpi, tunnel->hlen);
892 mtu < ntohs(old_iph->tot_len)) { 426 if (unlikely(!skb)) {
893 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 427 dev->stats.tx_dropped++;
894 ip_rt_put(rt); 428 return;
895 goto tx_error;
896 }
897 } 429 }
898#if IS_ENABLED(CONFIG_IPV6)
899 else if (skb->protocol == htons(ETH_P_IPV6)) {
900 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
901
902 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
903 if ((tunnel->parms.iph.daddr &&
904 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
905 rt6->rt6i_dst.plen == 128) {
906 rt6->rt6i_flags |= RTF_MODIFIED;
907 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
908 }
909 }
910 430
911 if (!skb_is_gso(skb) && 431 ip_tunnel_xmit(skb, dev, tnl_params);
912 mtu >= IPV6_MIN_MTU && 432}
913 mtu < skb->len - tunnel->hlen + gre_hlen) {
914 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
915 ip_rt_put(rt);
916 goto tx_error;
917 }
918 }
919#endif
920 433
921 if (tunnel->err_count > 0) { 434static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
922 if (time_before(jiffies, 435 struct net_device *dev)
923 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 436{
924 tunnel->err_count--; 437 struct ip_tunnel *tunnel = netdev_priv(dev);
438 const struct iphdr *tnl_params;
925 439
926 dst_link_failure(skb); 440 skb = handle_offloads(tunnel, skb);
927 } else 441 if (IS_ERR(skb))
928 tunnel->err_count = 0; 442 goto out;
929 }
930 443
931 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; 444 if (dev->header_ops) {
932 445 /* Need space for new headers */
933 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 446 if (skb_cow_head(skb, dev->needed_headroom -
934 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 447 (tunnel->hlen + sizeof(struct iphdr))));
935 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 448 goto free_skb;
936 if (max_headroom > dev->needed_headroom)
937 dev->needed_headroom = max_headroom;
938 if (!new_skb) {
939 ip_rt_put(rt);
940 dev->stats.tx_dropped++;
941 dev_kfree_skb(skb);
942 return NETDEV_TX_OK;
943 }
944 if (skb->sk)
945 skb_set_owner_w(new_skb, skb->sk);
946 dev_kfree_skb(skb);
947 skb = new_skb;
948 old_iph = ip_hdr(skb);
949 /* Warning : tiph value might point to freed memory */
950 }
951 449
952 skb_push(skb, gre_hlen); 450 tnl_params = (const struct iphdr *)skb->data;
953 skb_reset_network_header(skb);
954 skb_set_transport_header(skb, sizeof(*iph));
955 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
956 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
957 IPSKB_REROUTED);
958 skb_dst_drop(skb);
959 skb_dst_set(skb, &rt->dst);
960
961 /*
962 * Push down and install the IPIP header.
963 */
964 451
965 iph = ip_hdr(skb); 452 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
966 iph->version = 4; 453 * to gre header.
967 iph->ihl = sizeof(struct iphdr) >> 2; 454 */
968 iph->frag_off = df; 455 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
969 iph->protocol = IPPROTO_GRE; 456 } else {
970 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 457 if (skb_cow_head(skb, dev->needed_headroom))
971 iph->daddr = fl4.daddr; 458 goto free_skb;
972 iph->saddr = fl4.saddr;
973 iph->ttl = ttl;
974
975 tunnel_ip_select_ident(skb, old_iph, &rt->dst);
976
977 if (ttl == 0) {
978 if (skb->protocol == htons(ETH_P_IP))
979 iph->ttl = old_iph->ttl;
980#if IS_ENABLED(CONFIG_IPV6)
981 else if (skb->protocol == htons(ETH_P_IPV6))
982 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
983#endif
984 else
985 iph->ttl = ip4_dst_hoplimit(&rt->dst);
986 }
987
988 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
989 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
990 htons(ETH_P_TEB) : skb->protocol;
991
992 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
993 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
994 459
995 if (tunnel->parms.o_flags&GRE_SEQ) { 460 tnl_params = &tunnel->parms.iph;
996 ++tunnel->o_seqno;
997 *ptr = htonl(tunnel->o_seqno);
998 ptr--;
999 }
1000 if (tunnel->parms.o_flags&GRE_KEY) {
1001 *ptr = tunnel->parms.o_key;
1002 ptr--;
1003 }
1004 /* Skip GRE checksum if skb is getting offloaded. */
1005 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
1006 (tunnel->parms.o_flags&GRE_CSUM)) {
1007 int offset = skb_transport_offset(skb);
1008
1009 if (skb_has_shared_frag(skb)) {
1010 err = __skb_linearize(skb);
1011 if (err)
1012 goto tx_error;
1013 }
1014
1015 *ptr = 0;
1016 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
1017 skb->len - offset,
1018 0));
1019 }
1020 } 461 }
1021 462
1022 nf_reset(skb); 463 __gre_xmit(skb, dev, tnl_params, skb->protocol);
1023 464
1024 pkt_len = skb->len - skb_transport_offset(skb);
1025 err = ip_local_out(skb);
1026 if (likely(net_xmit_eval(err) == 0)) {
1027 u64_stats_update_begin(&tstats->syncp);
1028 tstats->tx_bytes += pkt_len;
1029 tstats->tx_packets++;
1030 u64_stats_update_end(&tstats->syncp);
1031 } else {
1032 dev->stats.tx_errors++;
1033 dev->stats.tx_aborted_errors++;
1034 }
1035 return NETDEV_TX_OK; 465 return NETDEV_TX_OK;
1036 466
1037#if IS_ENABLED(CONFIG_IPV6) 467free_skb:
1038tx_error_icmp:
1039 dst_link_failure(skb);
1040#endif
1041tx_error:
1042 dev->stats.tx_errors++;
1043 dev_kfree_skb(skb); 468 dev_kfree_skb(skb);
469out:
470 dev->stats.tx_dropped++;
1044 return NETDEV_TX_OK; 471 return NETDEV_TX_OK;
1045} 472}
1046 473
1047static int ipgre_tunnel_bind_dev(struct net_device *dev) 474static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
475 struct net_device *dev)
1048{ 476{
1049 struct net_device *tdev = NULL; 477 struct ip_tunnel *tunnel = netdev_priv(dev);
1050 struct ip_tunnel *tunnel;
1051 const struct iphdr *iph;
1052 int hlen = LL_MAX_HEADER;
1053 int mtu = ETH_DATA_LEN;
1054 int addend = sizeof(struct iphdr) + 4;
1055
1056 tunnel = netdev_priv(dev);
1057 iph = &tunnel->parms.iph;
1058
1059 /* Guess output device to choose reasonable mtu and needed_headroom */
1060
1061 if (iph->daddr) {
1062 struct flowi4 fl4;
1063 struct rtable *rt;
1064
1065 rt = ip_route_output_gre(dev_net(dev), &fl4,
1066 iph->daddr, iph->saddr,
1067 tunnel->parms.o_key,
1068 RT_TOS(iph->tos),
1069 tunnel->parms.link);
1070 if (!IS_ERR(rt)) {
1071 tdev = rt->dst.dev;
1072 ip_rt_put(rt);
1073 }
1074
1075 if (dev->type != ARPHRD_ETHER)
1076 dev->flags |= IFF_POINTOPOINT;
1077 }
1078 478
1079 if (!tdev && tunnel->parms.link) 479 skb = handle_offloads(tunnel, skb);
1080 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 480 if (IS_ERR(skb))
481 goto out;
1081 482
1082 if (tdev) { 483 if (skb_cow_head(skb, dev->needed_headroom))
1083 hlen = tdev->hard_header_len + tdev->needed_headroom; 484 goto free_skb;
1084 mtu = tdev->mtu;
1085 }
1086 dev->iflink = tunnel->parms.link;
1087
1088 /* Precalculate GRE options length */
1089 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1090 if (tunnel->parms.o_flags&GRE_CSUM)
1091 addend += 4;
1092 if (tunnel->parms.o_flags&GRE_KEY)
1093 addend += 4;
1094 if (tunnel->parms.o_flags&GRE_SEQ)
1095 addend += 4;
1096 }
1097 dev->needed_headroom = addend + hlen;
1098 mtu -= dev->hard_header_len + addend;
1099 485
1100 if (mtu < 68) 486 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
1101 mtu = 68;
1102 487
1103 tunnel->hlen = addend; 488 return NETDEV_TX_OK;
1104 /* TCP offload with GRE SEQ is not supported. */
1105 if (!(tunnel->parms.o_flags & GRE_SEQ)) {
1106 dev->features |= NETIF_F_GSO_SOFTWARE;
1107 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1108 }
1109 489
1110 return mtu; 490free_skb:
491 dev_kfree_skb(skb);
492out:
493 dev->stats.tx_dropped++;
494 return NETDEV_TX_OK;
1111} 495}
1112 496
1113static int 497static int ipgre_tunnel_ioctl(struct net_device *dev,
1114ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 498 struct ifreq *ifr, int cmd)
1115{ 499{
1116 int err = 0; 500 int err = 0;
1117 struct ip_tunnel_parm p; 501 struct ip_tunnel_parm p;
1118 struct ip_tunnel *t;
1119 struct net *net = dev_net(dev);
1120 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1121
1122 switch (cmd) {
1123 case SIOCGETTUNNEL:
1124 t = NULL;
1125 if (dev == ign->fb_tunnel_dev) {
1126 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1127 err = -EFAULT;
1128 break;
1129 }
1130 t = ipgre_tunnel_locate(net, &p, 0);
1131 }
1132 if (t == NULL)
1133 t = netdev_priv(dev);
1134 memcpy(&p, &t->parms, sizeof(p));
1135 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1136 err = -EFAULT;
1137 break;
1138
1139 case SIOCADDTUNNEL:
1140 case SIOCCHGTUNNEL:
1141 err = -EPERM;
1142 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1143 goto done;
1144
1145 err = -EFAULT;
1146 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1147 goto done;
1148
1149 err = -EINVAL;
1150 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1151 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1152 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1153 goto done;
1154 if (p.iph.ttl)
1155 p.iph.frag_off |= htons(IP_DF);
1156
1157 if (!(p.i_flags&GRE_KEY))
1158 p.i_key = 0;
1159 if (!(p.o_flags&GRE_KEY))
1160 p.o_key = 0;
1161
1162 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1163
1164 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1165 if (t != NULL) {
1166 if (t->dev != dev) {
1167 err = -EEXIST;
1168 break;
1169 }
1170 } else {
1171 unsigned int nflags = 0;
1172
1173 t = netdev_priv(dev);
1174
1175 if (ipv4_is_multicast(p.iph.daddr))
1176 nflags = IFF_BROADCAST;
1177 else if (p.iph.daddr)
1178 nflags = IFF_POINTOPOINT;
1179
1180 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1181 err = -EINVAL;
1182 break;
1183 }
1184 ipgre_tunnel_unlink(ign, t);
1185 synchronize_net();
1186 t->parms.iph.saddr = p.iph.saddr;
1187 t->parms.iph.daddr = p.iph.daddr;
1188 t->parms.i_key = p.i_key;
1189 t->parms.o_key = p.o_key;
1190 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1191 memcpy(dev->broadcast, &p.iph.daddr, 4);
1192 ipgre_tunnel_link(ign, t);
1193 netdev_state_change(dev);
1194 }
1195 }
1196
1197 if (t) {
1198 err = 0;
1199 if (cmd == SIOCCHGTUNNEL) {
1200 t->parms.iph.ttl = p.iph.ttl;
1201 t->parms.iph.tos = p.iph.tos;
1202 t->parms.iph.frag_off = p.iph.frag_off;
1203 if (t->parms.link != p.link) {
1204 t->parms.link = p.link;
1205 dev->mtu = ipgre_tunnel_bind_dev(dev);
1206 netdev_state_change(dev);
1207 }
1208 }
1209 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1210 err = -EFAULT;
1211 } else
1212 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1213 break;
1214
1215 case SIOCDELTUNNEL:
1216 err = -EPERM;
1217 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1218 goto done;
1219
1220 if (dev == ign->fb_tunnel_dev) {
1221 err = -EFAULT;
1222 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1223 goto done;
1224 err = -ENOENT;
1225 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1226 goto done;
1227 err = -EPERM;
1228 if (t == netdev_priv(ign->fb_tunnel_dev))
1229 goto done;
1230 dev = t->dev;
1231 }
1232 unregister_netdevice(dev);
1233 err = 0;
1234 break;
1235 502
1236 default: 503 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1237 err = -EINVAL; 504 return -EFAULT;
505 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
506 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
507 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) {
508 return -EINVAL;
1238 } 509 }
510 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
511 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
1239 512
1240done: 513 err = ip_tunnel_ioctl(dev, &p, cmd);
1241 return err; 514 if (err)
1242} 515 return err;
1243 516
1244static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 517 p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
1245{ 518 p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
1246 struct ip_tunnel *tunnel = netdev_priv(dev); 519
1247 if (new_mtu < 68 || 520 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1248 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) 521 return -EFAULT;
1249 return -EINVAL;
1250 dev->mtu = new_mtu;
1251 return 0; 522 return 0;
1252} 523}
1253 524
@@ -1277,25 +548,23 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1277 ... 548 ...
1278 ftp fec0:6666:6666::193.233.7.65 549 ftp fec0:6666:6666::193.233.7.65
1279 ... 550 ...
1280
1281 */ 551 */
1282
1283static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 552static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1284 unsigned short type, 553 unsigned short type,
1285 const void *daddr, const void *saddr, unsigned int len) 554 const void *daddr, const void *saddr, unsigned int len)
1286{ 555{
1287 struct ip_tunnel *t = netdev_priv(dev); 556 struct ip_tunnel *t = netdev_priv(dev);
1288 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 557 struct iphdr *iph;
1289 __be16 *p = (__be16 *)(iph+1); 558 struct gre_base_hdr *greh;
1290 559
1291 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 560 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
1292 p[0] = t->parms.o_flags; 561 greh = (struct gre_base_hdr *)(iph+1);
1293 p[1] = htons(type); 562 greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
563 greh->protocol = htons(type);
1294 564
1295 /* 565 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1296 * Set the source hardware address.
1297 */
1298 566
567 /* Set the source hardware address. */
1299 if (saddr) 568 if (saddr)
1300 memcpy(&iph->saddr, saddr, 4); 569 memcpy(&iph->saddr, saddr, 4);
1301 if (daddr) 570 if (daddr)
@@ -1303,7 +572,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1303 if (iph->daddr) 572 if (iph->daddr)
1304 return t->hlen; 573 return t->hlen;
1305 574
1306 return -t->hlen; 575 return -(t->hlen + sizeof(*iph));
1307} 576}
1308 577
1309static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 578static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
@@ -1357,31 +626,21 @@ static int ipgre_close(struct net_device *dev)
1357 } 626 }
1358 return 0; 627 return 0;
1359} 628}
1360
1361#endif 629#endif
1362 630
1363static const struct net_device_ops ipgre_netdev_ops = { 631static const struct net_device_ops ipgre_netdev_ops = {
1364 .ndo_init = ipgre_tunnel_init, 632 .ndo_init = ipgre_tunnel_init,
1365 .ndo_uninit = ipgre_tunnel_uninit, 633 .ndo_uninit = ip_tunnel_uninit,
1366#ifdef CONFIG_NET_IPGRE_BROADCAST 634#ifdef CONFIG_NET_IPGRE_BROADCAST
1367 .ndo_open = ipgre_open, 635 .ndo_open = ipgre_open,
1368 .ndo_stop = ipgre_close, 636 .ndo_stop = ipgre_close,
1369#endif 637#endif
1370 .ndo_start_xmit = ipgre_tunnel_xmit, 638 .ndo_start_xmit = ipgre_xmit,
1371 .ndo_do_ioctl = ipgre_tunnel_ioctl, 639 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1372 .ndo_change_mtu = ipgre_tunnel_change_mtu, 640 .ndo_change_mtu = ip_tunnel_change_mtu,
1373 .ndo_get_stats64 = ipgre_get_stats64, 641 .ndo_get_stats64 = ip_tunnel_get_stats64,
1374}; 642};
1375 643
1376static void ipgre_dev_free(struct net_device *dev)
1377{
1378 struct ip_tunnel *tunnel = netdev_priv(dev);
1379
1380 gro_cells_destroy(&tunnel->gro_cells);
1381 free_percpu(dev->tstats);
1382 free_netdev(dev);
1383}
1384
1385#define GRE_FEATURES (NETIF_F_SG | \ 644#define GRE_FEATURES (NETIF_F_SG | \
1386 NETIF_F_FRAGLIST | \ 645 NETIF_F_FRAGLIST | \
1387 NETIF_F_HIGHDMA | \ 646 NETIF_F_HIGHDMA | \
@@ -1390,35 +649,48 @@ static void ipgre_dev_free(struct net_device *dev)
1390static void ipgre_tunnel_setup(struct net_device *dev) 649static void ipgre_tunnel_setup(struct net_device *dev)
1391{ 650{
1392 dev->netdev_ops = &ipgre_netdev_ops; 651 dev->netdev_ops = &ipgre_netdev_ops;
1393 dev->destructor = ipgre_dev_free; 652 ip_tunnel_setup(dev, ipgre_net_id);
653}
1394 654
1395 dev->type = ARPHRD_IPGRE; 655static void __gre_tunnel_init(struct net_device *dev)
1396 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 656{
657 struct ip_tunnel *tunnel;
658
659 tunnel = netdev_priv(dev);
660 tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
661 tunnel->parms.iph.protocol = IPPROTO_GRE;
662
663 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1397 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; 664 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1398 dev->flags = IFF_NOARP;
1399 dev->iflink = 0;
1400 dev->addr_len = 4;
1401 dev->features |= NETIF_F_NETNS_LOCAL;
1402 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1403 665
1404 dev->features |= GRE_FEATURES; 666 dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
1405 dev->hw_features |= GRE_FEATURES; 667 dev->hw_features |= GRE_FEATURES;
668
669 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
670 /* TCP offload with GRE SEQ is not supported. */
671 dev->features |= NETIF_F_GSO_SOFTWARE;
672 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
673 /* Can use a lockless transmit, unless we generate
674 * output sequences
675 */
676 dev->features |= NETIF_F_LLTX;
677 }
1406} 678}
1407 679
1408static int ipgre_tunnel_init(struct net_device *dev) 680static int ipgre_tunnel_init(struct net_device *dev)
1409{ 681{
1410 struct ip_tunnel *tunnel; 682 struct ip_tunnel *tunnel = netdev_priv(dev);
1411 struct iphdr *iph; 683 struct iphdr *iph = &tunnel->parms.iph;
1412 int err;
1413 684
1414 tunnel = netdev_priv(dev); 685 __gre_tunnel_init(dev);
1415 iph = &tunnel->parms.iph;
1416 686
1417 tunnel->dev = dev; 687 memcpy(dev->dev_addr, &iph->saddr, 4);
1418 strcpy(tunnel->parms.name, dev->name); 688 memcpy(dev->broadcast, &iph->daddr, 4);
1419 689
1420 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 690 dev->type = ARPHRD_IPGRE;
1421 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 691 dev->flags = IFF_NOARP;
692 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
693 dev->addr_len = 4;
1422 694
1423 if (iph->daddr) { 695 if (iph->daddr) {
1424#ifdef CONFIG_NET_IPGRE_BROADCAST 696#ifdef CONFIG_NET_IPGRE_BROADCAST
@@ -1432,106 +704,30 @@ static int ipgre_tunnel_init(struct net_device *dev)
1432 } else 704 } else
1433 dev->header_ops = &ipgre_header_ops; 705 dev->header_ops = &ipgre_header_ops;
1434 706
1435 dev->tstats = alloc_percpu(struct pcpu_tstats); 707 return ip_tunnel_init(dev);
1436 if (!dev->tstats)
1437 return -ENOMEM;
1438
1439 err = gro_cells_init(&tunnel->gro_cells, dev);
1440 if (err) {
1441 free_percpu(dev->tstats);
1442 return err;
1443 }
1444
1445 return 0;
1446}
1447
1448static void ipgre_fb_tunnel_init(struct net_device *dev)
1449{
1450 struct ip_tunnel *tunnel = netdev_priv(dev);
1451 struct iphdr *iph = &tunnel->parms.iph;
1452
1453 tunnel->dev = dev;
1454 strcpy(tunnel->parms.name, dev->name);
1455
1456 iph->version = 4;
1457 iph->protocol = IPPROTO_GRE;
1458 iph->ihl = 5;
1459 tunnel->hlen = sizeof(struct iphdr) + 4;
1460
1461 dev_hold(dev);
1462} 708}
1463 709
1464
1465static const struct gre_protocol ipgre_protocol = { 710static const struct gre_protocol ipgre_protocol = {
1466 .handler = ipgre_rcv, 711 .handler = ipgre_rcv,
1467 .err_handler = ipgre_err, 712 .err_handler = ipgre_err,
1468}; 713};
1469 714
1470static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1471{
1472 int prio;
1473
1474 for (prio = 0; prio < 4; prio++) {
1475 int h;
1476 for (h = 0; h < HASH_SIZE; h++) {
1477 struct ip_tunnel *t;
1478
1479 t = rtnl_dereference(ign->tunnels[prio][h]);
1480
1481 while (t != NULL) {
1482 unregister_netdevice_queue(t->dev, head);
1483 t = rtnl_dereference(t->next);
1484 }
1485 }
1486 }
1487}
1488
1489static int __net_init ipgre_init_net(struct net *net) 715static int __net_init ipgre_init_net(struct net *net)
1490{ 716{
1491 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 717 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1492 int err;
1493
1494 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1495 ipgre_tunnel_setup);
1496 if (!ign->fb_tunnel_dev) {
1497 err = -ENOMEM;
1498 goto err_alloc_dev;
1499 }
1500 dev_net_set(ign->fb_tunnel_dev, net);
1501
1502 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1503 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1504
1505 if ((err = register_netdev(ign->fb_tunnel_dev)))
1506 goto err_reg_dev;
1507
1508 rcu_assign_pointer(ign->tunnels_wc[0],
1509 netdev_priv(ign->fb_tunnel_dev));
1510 return 0;
1511
1512err_reg_dev:
1513 ipgre_dev_free(ign->fb_tunnel_dev);
1514err_alloc_dev:
1515 return err;
1516} 718}
1517 719
1518static void __net_exit ipgre_exit_net(struct net *net) 720static void __net_exit ipgre_exit_net(struct net *net)
1519{ 721{
1520 struct ipgre_net *ign; 722 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
1521 LIST_HEAD(list); 723 ip_tunnel_delete_net(itn);
1522
1523 ign = net_generic(net, ipgre_net_id);
1524 rtnl_lock();
1525 ipgre_destroy_tunnels(ign, &list);
1526 unregister_netdevice_many(&list);
1527 rtnl_unlock();
1528} 724}
1529 725
1530static struct pernet_operations ipgre_net_ops = { 726static struct pernet_operations ipgre_net_ops = {
1531 .init = ipgre_init_net, 727 .init = ipgre_init_net,
1532 .exit = ipgre_exit_net, 728 .exit = ipgre_exit_net,
1533 .id = &ipgre_net_id, 729 .id = &ipgre_net_id,
1534 .size = sizeof(struct ipgre_net), 730 .size = sizeof(struct ip_tunnel_net),
1535}; 731};
1536 732
1537static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 733static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1576,8 +772,8 @@ out:
1576 return ipgre_tunnel_validate(tb, data); 772 return ipgre_tunnel_validate(tb, data);
1577} 773}
1578 774
1579static void ipgre_netlink_parms(struct nlattr *data[], 775static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
1580 struct ip_tunnel_parm *parms) 776 struct ip_tunnel_parm *parms)
1581{ 777{
1582 memset(parms, 0, sizeof(*parms)); 778 memset(parms, 0, sizeof(*parms));
1583 779
@@ -1590,10 +786,10 @@ static void ipgre_netlink_parms(struct nlattr *data[],
1590 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 786 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1591 787
1592 if (data[IFLA_GRE_IFLAGS]) 788 if (data[IFLA_GRE_IFLAGS])
1593 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); 789 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1594 790
1595 if (data[IFLA_GRE_OFLAGS]) 791 if (data[IFLA_GRE_OFLAGS])
1596 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); 792 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1597 793
1598 if (data[IFLA_GRE_IKEY]) 794 if (data[IFLA_GRE_IKEY])
1599 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 795 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1617,148 +813,46 @@ static void ipgre_netlink_parms(struct nlattr *data[],
1617 parms->iph.frag_off = htons(IP_DF); 813 parms->iph.frag_off = htons(IP_DF);
1618} 814}
1619 815
1620static int ipgre_tap_init(struct net_device *dev) 816static int gre_tap_init(struct net_device *dev)
1621{ 817{
1622 struct ip_tunnel *tunnel; 818 __gre_tunnel_init(dev);
1623
1624 tunnel = netdev_priv(dev);
1625
1626 tunnel->dev = dev;
1627 strcpy(tunnel->parms.name, dev->name);
1628 819
1629 ipgre_tunnel_bind_dev(dev); 820 return ip_tunnel_init(dev);
1630
1631 dev->tstats = alloc_percpu(struct pcpu_tstats);
1632 if (!dev->tstats)
1633 return -ENOMEM;
1634
1635 return 0;
1636} 821}
1637 822
1638static const struct net_device_ops ipgre_tap_netdev_ops = { 823static const struct net_device_ops gre_tap_netdev_ops = {
1639 .ndo_init = ipgre_tap_init, 824 .ndo_init = gre_tap_init,
1640 .ndo_uninit = ipgre_tunnel_uninit, 825 .ndo_uninit = ip_tunnel_uninit,
1641 .ndo_start_xmit = ipgre_tunnel_xmit, 826 .ndo_start_xmit = gre_tap_xmit,
1642 .ndo_set_mac_address = eth_mac_addr, 827 .ndo_set_mac_address = eth_mac_addr,
1643 .ndo_validate_addr = eth_validate_addr, 828 .ndo_validate_addr = eth_validate_addr,
1644 .ndo_change_mtu = ipgre_tunnel_change_mtu, 829 .ndo_change_mtu = ip_tunnel_change_mtu,
1645 .ndo_get_stats64 = ipgre_get_stats64, 830 .ndo_get_stats64 = ip_tunnel_get_stats64,
1646}; 831};
1647 832
1648static void ipgre_tap_setup(struct net_device *dev) 833static void ipgre_tap_setup(struct net_device *dev)
1649{ 834{
1650
1651 ether_setup(dev); 835 ether_setup(dev);
1652 836 dev->netdev_ops = &gre_tap_netdev_ops;
1653 dev->netdev_ops = &ipgre_tap_netdev_ops; 837 ip_tunnel_setup(dev, gre_tap_net_id);
1654 dev->destructor = ipgre_dev_free;
1655
1656 dev->iflink = 0;
1657 dev->features |= NETIF_F_NETNS_LOCAL;
1658
1659 dev->features |= GRE_FEATURES;
1660 dev->hw_features |= GRE_FEATURES;
1661} 838}
1662 839
1663static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], 840static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1664 struct nlattr *data[]) 841 struct nlattr *tb[], struct nlattr *data[])
1665{ 842{
1666 struct ip_tunnel *nt; 843 struct ip_tunnel_parm p;
1667 struct net *net = dev_net(dev);
1668 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1669 int mtu;
1670 int err;
1671
1672 nt = netdev_priv(dev);
1673 ipgre_netlink_parms(data, &nt->parms);
1674
1675 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1676 return -EEXIST;
1677
1678 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1679 eth_hw_addr_random(dev);
1680
1681 mtu = ipgre_tunnel_bind_dev(dev);
1682 if (!tb[IFLA_MTU])
1683 dev->mtu = mtu;
1684
1685 /* Can use a lockless transmit, unless we generate output sequences */
1686 if (!(nt->parms.o_flags & GRE_SEQ))
1687 dev->features |= NETIF_F_LLTX;
1688
1689 err = register_netdevice(dev);
1690 if (err)
1691 goto out;
1692
1693 dev_hold(dev);
1694 ipgre_tunnel_link(ign, nt);
1695 844
1696out: 845 ipgre_netlink_parms(data, tb, &p);
1697 return err; 846 return ip_tunnel_newlink(dev, tb, &p);
1698} 847}
1699 848
1700static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 849static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1701 struct nlattr *data[]) 850 struct nlattr *data[])
1702{ 851{
1703 struct ip_tunnel *t, *nt;
1704 struct net *net = dev_net(dev);
1705 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1706 struct ip_tunnel_parm p; 852 struct ip_tunnel_parm p;
1707 int mtu;
1708
1709 if (dev == ign->fb_tunnel_dev)
1710 return -EINVAL;
1711
1712 nt = netdev_priv(dev);
1713 ipgre_netlink_parms(data, &p);
1714
1715 t = ipgre_tunnel_locate(net, &p, 0);
1716
1717 if (t) {
1718 if (t->dev != dev)
1719 return -EEXIST;
1720 } else {
1721 t = nt;
1722
1723 if (dev->type != ARPHRD_ETHER) {
1724 unsigned int nflags = 0;
1725
1726 if (ipv4_is_multicast(p.iph.daddr))
1727 nflags = IFF_BROADCAST;
1728 else if (p.iph.daddr)
1729 nflags = IFF_POINTOPOINT;
1730
1731 if ((dev->flags ^ nflags) &
1732 (IFF_POINTOPOINT | IFF_BROADCAST))
1733 return -EINVAL;
1734 }
1735 853
1736 ipgre_tunnel_unlink(ign, t); 854 ipgre_netlink_parms(data, tb, &p);
1737 t->parms.iph.saddr = p.iph.saddr; 855 return ip_tunnel_changelink(dev, tb, &p);
1738 t->parms.iph.daddr = p.iph.daddr;
1739 t->parms.i_key = p.i_key;
1740 if (dev->type != ARPHRD_ETHER) {
1741 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1742 memcpy(dev->broadcast, &p.iph.daddr, 4);
1743 }
1744 ipgre_tunnel_link(ign, t);
1745 netdev_state_change(dev);
1746 }
1747
1748 t->parms.o_key = p.o_key;
1749 t->parms.iph.ttl = p.iph.ttl;
1750 t->parms.iph.tos = p.iph.tos;
1751 t->parms.iph.frag_off = p.iph.frag_off;
1752
1753 if (t->parms.link != p.link) {
1754 t->parms.link = p.link;
1755 mtu = ipgre_tunnel_bind_dev(dev);
1756 if (!tb[IFLA_MTU])
1757 dev->mtu = mtu;
1758 netdev_state_change(dev);
1759 }
1760
1761 return 0;
1762} 856}
1763 857
1764static size_t ipgre_get_size(const struct net_device *dev) 858static size_t ipgre_get_size(const struct net_device *dev)
@@ -1793,8 +887,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1793 struct ip_tunnel_parm *p = &t->parms; 887 struct ip_tunnel_parm *p = &t->parms;
1794 888
1795 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 889 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1796 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || 890 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1797 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || 891 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1798 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 892 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1799 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 893 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1800 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 894 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
@@ -1832,6 +926,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1832 .validate = ipgre_tunnel_validate, 926 .validate = ipgre_tunnel_validate,
1833 .newlink = ipgre_newlink, 927 .newlink = ipgre_newlink,
1834 .changelink = ipgre_changelink, 928 .changelink = ipgre_changelink,
929 .dellink = ip_tunnel_dellink,
1835 .get_size = ipgre_get_size, 930 .get_size = ipgre_get_size,
1836 .fill_info = ipgre_fill_info, 931 .fill_info = ipgre_fill_info,
1837}; 932};
@@ -1845,13 +940,28 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1845 .validate = ipgre_tap_validate, 940 .validate = ipgre_tap_validate,
1846 .newlink = ipgre_newlink, 941 .newlink = ipgre_newlink,
1847 .changelink = ipgre_changelink, 942 .changelink = ipgre_changelink,
943 .dellink = ip_tunnel_dellink,
1848 .get_size = ipgre_get_size, 944 .get_size = ipgre_get_size,
1849 .fill_info = ipgre_fill_info, 945 .fill_info = ipgre_fill_info,
1850}; 946};
1851 947
1852/* 948static int __net_init ipgre_tap_init_net(struct net *net)
1853 * And now the modules code and kernel interface. 949{
1854 */ 950 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
951}
952
953static void __net_exit ipgre_tap_exit_net(struct net *net)
954{
955 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
956 ip_tunnel_delete_net(itn);
957}
958
959static struct pernet_operations ipgre_tap_net_ops = {
960 .init = ipgre_tap_init_net,
961 .exit = ipgre_tap_exit_net,
962 .id = &gre_tap_net_id,
963 .size = sizeof(struct ip_tunnel_net),
964};
1855 965
1856static int __init ipgre_init(void) 966static int __init ipgre_init(void)
1857{ 967{
@@ -1863,6 +973,10 @@ static int __init ipgre_init(void)
1863 if (err < 0) 973 if (err < 0)
1864 return err; 974 return err;
1865 975
976 err = register_pernet_device(&ipgre_tap_net_ops);
977 if (err < 0)
978 goto pnet_tap_faied;
979
1866 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 980 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1867 if (err < 0) { 981 if (err < 0) {
1868 pr_info("%s: can't add protocol\n", __func__); 982 pr_info("%s: can't add protocol\n", __func__);
@@ -1877,16 +991,17 @@ static int __init ipgre_init(void)
1877 if (err < 0) 991 if (err < 0)
1878 goto tap_ops_failed; 992 goto tap_ops_failed;
1879 993
1880out: 994 return 0;
1881 return err;
1882 995
1883tap_ops_failed: 996tap_ops_failed:
1884 rtnl_link_unregister(&ipgre_link_ops); 997 rtnl_link_unregister(&ipgre_link_ops);
1885rtnl_link_failed: 998rtnl_link_failed:
1886 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 999 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1887add_proto_failed: 1000add_proto_failed:
1001 unregister_pernet_device(&ipgre_tap_net_ops);
1002pnet_tap_faied:
1888 unregister_pernet_device(&ipgre_net_ops); 1003 unregister_pernet_device(&ipgre_net_ops);
1889 goto out; 1004 return err;
1890} 1005}
1891 1006
1892static void __exit ipgre_fini(void) 1007static void __exit ipgre_fini(void)
@@ -1895,6 +1010,7 @@ static void __exit ipgre_fini(void)
1895 rtnl_link_unregister(&ipgre_link_ops); 1010 rtnl_link_unregister(&ipgre_link_ops);
1896 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) 1011 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1897 pr_info("%s: can't remove protocol\n", __func__); 1012 pr_info("%s: can't remove protocol\n", __func__);
1013 unregister_pernet_device(&ipgre_tap_net_ops);
1898 unregister_pernet_device(&ipgre_net_ops); 1014 unregister_pernet_device(&ipgre_net_ops);
1899} 1015}
1900 1016
@@ -1904,3 +1020,4 @@ MODULE_LICENSE("GPL");
1904MODULE_ALIAS_RTNL_LINK("gre"); 1020MODULE_ALIAS_RTNL_LINK("gre");
1905MODULE_ALIAS_RTNL_LINK("gretap"); 1021MODULE_ALIAS_RTNL_LINK("gretap");
1906MODULE_ALIAS_NETDEV("gre0"); 1022MODULE_ALIAS_NETDEV("gre0");
1023MODULE_ALIAS_NETDEV("gretap0");
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5e12dca7b3dd..147abf5275aa 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
430 to->tc_index = from->tc_index; 430 to->tc_index = from->tc_index;
431#endif 431#endif
432 nf_copy(to, from); 432 nf_copy(to, from);
433#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 433#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
434 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
435 to->nf_trace = from->nf_trace; 434 to->nf_trace = from->nf_trace;
436#endif 435#endif
437#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 436#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
new file mode 100644
index 000000000000..e4147ec1665a
--- /dev/null
+++ b/net/ipv4/ip_tunnel.c
@@ -0,0 +1,1035 @@
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57
58#if IS_ENABLED(CONFIG_IPV6)
59#include <net/ipv6.h>
60#include <net/ip6_fib.h>
61#include <net/ip6_route.h>
62#endif
63
64static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
71/* Often modified stats are per cpu, other are shared (netdev->stats) */
72struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73 struct rtnl_link_stats64 *tot)
74{
75 int i;
76
77 for_each_possible_cpu(i) {
78 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80 unsigned int start;
81
82 do {
83 start = u64_stats_fetch_begin_bh(&tstats->syncp);
84 rx_packets = tstats->rx_packets;
85 tx_packets = tstats->tx_packets;
86 rx_bytes = tstats->rx_bytes;
87 tx_bytes = tstats->tx_bytes;
88 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90 tot->rx_packets += rx_packets;
91 tot->tx_packets += tx_packets;
92 tot->rx_bytes += rx_bytes;
93 tot->tx_bytes += tx_bytes;
94 }
95
96 tot->multicast = dev->stats.multicast;
97
98 tot->rx_crc_errors = dev->stats.rx_crc_errors;
99 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100 tot->rx_length_errors = dev->stats.rx_length_errors;
101 tot->rx_frame_errors = dev->stats.rx_frame_errors;
102 tot->rx_errors = dev->stats.rx_errors;
103
104 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106 tot->tx_dropped = dev->stats.tx_dropped;
107 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108 tot->tx_errors = dev->stats.tx_errors;
109
110 tot->collisions = dev->stats.collisions;
111
112 return tot;
113}
114EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117 __be16 flags, __be32 key)
118{
119 if (p->i_flags & TUNNEL_KEY) {
120 if (flags & TUNNEL_KEY)
121 return key == p->i_key;
122 else
123 /* key expected, none present */
124 return false;
125 } else
126 return !(flags & TUNNEL_KEY);
127}
128
129/* Fallback tunnel: no source, no destination, no key, no options
130
131 Tunnel hash table:
132 We require exact key match i.e. if a key is present in packet
133 it will match only tunnel with the same key; if it is not present,
134 it will match only keyless tunnel.
135
136 All keysless packets, if not matched configured keyless tunnels
137 will match fallback tunnel.
138 Given src, dst and key, find appropriate for input tunnel.
139*/
140struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141 int link, __be16 flags,
142 __be32 remote, __be32 local,
143 __be32 key)
144{
145 unsigned int hash;
146 struct ip_tunnel *t, *cand = NULL;
147 struct hlist_head *head;
148
149 hash = ip_tunnel_hash(itn, key, remote);
150 head = &itn->tunnels[hash];
151
152 hlist_for_each_entry_rcu(t, head, hash_node) {
153 if (local != t->parms.iph.saddr ||
154 remote != t->parms.iph.daddr ||
155 !(t->dev->flags & IFF_UP))
156 continue;
157
158 if (!ip_tunnel_key_match(&t->parms, flags, key))
159 continue;
160
161 if (t->parms.link == link)
162 return t;
163 else
164 cand = t;
165 }
166
167 hlist_for_each_entry_rcu(t, head, hash_node) {
168 if (remote != t->parms.iph.daddr ||
169 !(t->dev->flags & IFF_UP))
170 continue;
171
172 if (!ip_tunnel_key_match(&t->parms, flags, key))
173 continue;
174
175 if (t->parms.link == link)
176 return t;
177 else if (!cand)
178 cand = t;
179 }
180
181 hash = ip_tunnel_hash(itn, key, 0);
182 head = &itn->tunnels[hash];
183
184 hlist_for_each_entry_rcu(t, head, hash_node) {
185 if ((local != t->parms.iph.saddr &&
186 (local != t->parms.iph.daddr ||
187 !ipv4_is_multicast(local))) ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (!ip_tunnel_key_match(&t->parms, flags, key))
192 continue;
193
194 if (t->parms.link == link)
195 return t;
196 else if (!cand)
197 cand = t;
198 }
199
200 if (flags & TUNNEL_NO_KEY)
201 goto skip_key_lookup;
202
203 hlist_for_each_entry_rcu(t, head, hash_node) {
204 if (t->parms.i_key != key ||
205 !(t->dev->flags & IFF_UP))
206 continue;
207
208 if (t->parms.link == link)
209 return t;
210 else if (!cand)
211 cand = t;
212 }
213
214skip_key_lookup:
215 if (cand)
216 return cand;
217
218 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219 return netdev_priv(itn->fb_tunnel_dev);
220
221
222 return NULL;
223}
224EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227 struct ip_tunnel_parm *parms)
228{
229 unsigned int h;
230 __be32 remote;
231
232 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233 remote = parms->iph.daddr;
234 else
235 remote = 0;
236
237 h = ip_tunnel_hash(itn, parms->i_key, remote);
238 return &itn->tunnels[h];
239}
240
241static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242{
243 struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245 hlist_add_head_rcu(&t->hash_node, head);
246}
247
248static void ip_tunnel_del(struct ip_tunnel *t)
249{
250 hlist_del_init_rcu(&t->hash_node);
251}
252
253static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254 struct ip_tunnel_parm *parms,
255 int type)
256{
257 __be32 remote = parms->iph.daddr;
258 __be32 local = parms->iph.saddr;
259 __be32 key = parms->i_key;
260 int link = parms->link;
261 struct ip_tunnel *t = NULL;
262 struct hlist_head *head = ip_bucket(itn, parms);
263
264 hlist_for_each_entry_rcu(t, head, hash_node) {
265 if (local == t->parms.iph.saddr &&
266 remote == t->parms.iph.daddr &&
267 key == t->parms.i_key &&
268 link == t->parms.link &&
269 type == t->dev->type)
270 break;
271 }
272 return t;
273}
274
275static struct net_device *__ip_tunnel_create(struct net *net,
276 const struct rtnl_link_ops *ops,
277 struct ip_tunnel_parm *parms)
278{
279 int err;
280 struct ip_tunnel *tunnel;
281 struct net_device *dev;
282 char name[IFNAMSIZ];
283
284 if (parms->name[0])
285 strlcpy(name, parms->name, IFNAMSIZ);
286 else {
287 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
288 err = -E2BIG;
289 goto failed;
290 }
291 strlcpy(name, ops->kind, IFNAMSIZ);
292 strncat(name, "%d", 2);
293 }
294
295 ASSERT_RTNL();
296 dev = alloc_netdev(ops->priv_size, name, ops->setup);
297 if (!dev) {
298 err = -ENOMEM;
299 goto failed;
300 }
301 dev_net_set(dev, net);
302
303 dev->rtnl_link_ops = ops;
304
305 tunnel = netdev_priv(dev);
306 tunnel->parms = *parms;
307
308 err = register_netdevice(dev);
309 if (err)
310 goto failed_free;
311
312 return dev;
313
314failed_free:
315 free_netdev(dev);
316failed:
317 return ERR_PTR(err);
318}
319
320static inline struct rtable *ip_route_output_tunnel(struct net *net,
321 struct flowi4 *fl4,
322 int proto,
323 __be32 daddr, __be32 saddr,
324 __be32 key, __u8 tos, int oif)
325{
326 memset(fl4, 0, sizeof(*fl4));
327 fl4->flowi4_oif = oif;
328 fl4->daddr = daddr;
329 fl4->saddr = saddr;
330 fl4->flowi4_tos = tos;
331 fl4->flowi4_proto = proto;
332 fl4->fl4_gre_key = key;
333 return ip_route_output_key(net, fl4);
334}
335
336static int ip_tunnel_bind_dev(struct net_device *dev)
337{
338 struct net_device *tdev = NULL;
339 struct ip_tunnel *tunnel = netdev_priv(dev);
340 const struct iphdr *iph;
341 int hlen = LL_MAX_HEADER;
342 int mtu = ETH_DATA_LEN;
343 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
344
345 iph = &tunnel->parms.iph;
346
347 /* Guess output device to choose reasonable mtu and needed_headroom */
348 if (iph->daddr) {
349 struct flowi4 fl4;
350 struct rtable *rt;
351
352 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
353 tunnel->parms.iph.protocol,
354 iph->daddr, iph->saddr,
355 tunnel->parms.o_key,
356 RT_TOS(iph->tos),
357 tunnel->parms.link);
358 if (!IS_ERR(rt)) {
359 tdev = rt->dst.dev;
360 ip_rt_put(rt);
361 }
362 if (dev->type != ARPHRD_ETHER)
363 dev->flags |= IFF_POINTOPOINT;
364 }
365
366 if (!tdev && tunnel->parms.link)
367 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
368
369 if (tdev) {
370 hlen = tdev->hard_header_len + tdev->needed_headroom;
371 mtu = tdev->mtu;
372 }
373 dev->iflink = tunnel->parms.link;
374
375 dev->needed_headroom = t_hlen + hlen;
376 mtu -= (dev->hard_header_len + t_hlen);
377
378 if (mtu < 68)
379 mtu = 68;
380
381 return mtu;
382}
383
384static struct ip_tunnel *ip_tunnel_create(struct net *net,
385 struct ip_tunnel_net *itn,
386 struct ip_tunnel_parm *parms)
387{
388 struct ip_tunnel *nt, *fbt;
389 struct net_device *dev;
390
391 BUG_ON(!itn->fb_tunnel_dev);
392 fbt = netdev_priv(itn->fb_tunnel_dev);
393 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
394 if (IS_ERR(dev))
395 return NULL;
396
397 dev->mtu = ip_tunnel_bind_dev(dev);
398
399 nt = netdev_priv(dev);
400 ip_tunnel_add(itn, nt);
401 return nt;
402}
403
404int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
405 const struct tnl_ptk_info *tpi, bool log_ecn_error)
406{
407 struct pcpu_tstats *tstats;
408 const struct iphdr *iph = ip_hdr(skb);
409 int err;
410
411 secpath_reset(skb);
412
413 skb->protocol = tpi->proto;
414
415 skb->mac_header = skb->network_header;
416 __pskb_pull(skb, tunnel->hlen);
417 skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418#ifdef CONFIG_NET_IPGRE_BROADCAST
419 if (ipv4_is_multicast(iph->daddr)) {
420 /* Looped back packet, drop it! */
421 if (rt_is_output_route(skb_rtable(skb)))
422 goto drop;
423 tunnel->dev->stats.multicast++;
424 skb->pkt_type = PACKET_BROADCAST;
425 }
426#endif
427
428 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430 tunnel->dev->stats.rx_crc_errors++;
431 tunnel->dev->stats.rx_errors++;
432 goto drop;
433 }
434
435 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436 if (!(tpi->flags&TUNNEL_SEQ) ||
437 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438 tunnel->dev->stats.rx_fifo_errors++;
439 tunnel->dev->stats.rx_errors++;
440 goto drop;
441 }
442 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443 }
444
445 /* Warning: All skb pointers will be invalidated! */
446 if (tunnel->dev->type == ARPHRD_ETHER) {
447 if (!pskb_may_pull(skb, ETH_HLEN)) {
448 tunnel->dev->stats.rx_length_errors++;
449 tunnel->dev->stats.rx_errors++;
450 goto drop;
451 }
452
453 iph = ip_hdr(skb);
454 skb->protocol = eth_type_trans(skb, tunnel->dev);
455 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
456 }
457
458 skb->pkt_type = PACKET_HOST;
459 __skb_tunnel_rx(skb, tunnel->dev);
460
461 skb_reset_network_header(skb);
462 err = IP_ECN_decapsulate(iph, skb);
463 if (unlikely(err)) {
464 if (log_ecn_error)
465 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466 &iph->saddr, iph->tos);
467 if (err > 1) {
468 ++tunnel->dev->stats.rx_frame_errors;
469 ++tunnel->dev->stats.rx_errors;
470 goto drop;
471 }
472 }
473
474 tstats = this_cpu_ptr(tunnel->dev->tstats);
475 u64_stats_update_begin(&tstats->syncp);
476 tstats->rx_packets++;
477 tstats->rx_bytes += skb->len;
478 u64_stats_update_end(&tstats->syncp);
479
480 gro_cells_receive(&tunnel->gro_cells, skb);
481 return 0;
482
483drop:
484 kfree_skb(skb);
485 return 0;
486}
487EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
488
489void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
490 const struct iphdr *tnl_params)
491{
492 struct ip_tunnel *tunnel = netdev_priv(dev);
493 const struct iphdr *inner_iph;
494 struct iphdr *iph;
495 struct flowi4 fl4;
496 u8 tos, ttl;
497 __be16 df;
498 struct rtable *rt; /* Route to the other host */
499 struct net_device *tdev; /* Device to other host */
500 unsigned int max_headroom; /* The extra header space needed */
501 __be32 dst;
502 int mtu;
503
504 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
505
506 dst = tnl_params->daddr;
507 if (dst == 0) {
508 /* NBMA tunnel */
509
510 if (skb_dst(skb) == NULL) {
511 dev->stats.tx_fifo_errors++;
512 goto tx_error;
513 }
514
515 if (skb->protocol == htons(ETH_P_IP)) {
516 rt = skb_rtable(skb);
517 dst = rt_nexthop(rt, inner_iph->daddr);
518 }
519#if IS_ENABLED(CONFIG_IPV6)
520 else if (skb->protocol == htons(ETH_P_IPV6)) {
521 const struct in6_addr *addr6;
522 struct neighbour *neigh;
523 bool do_tx_error_icmp;
524 int addr_type;
525
526 neigh = dst_neigh_lookup(skb_dst(skb),
527 &ipv6_hdr(skb)->daddr);
528 if (neigh == NULL)
529 goto tx_error;
530
531 addr6 = (const struct in6_addr *)&neigh->primary_key;
532 addr_type = ipv6_addr_type(addr6);
533
534 if (addr_type == IPV6_ADDR_ANY) {
535 addr6 = &ipv6_hdr(skb)->daddr;
536 addr_type = ipv6_addr_type(addr6);
537 }
538
539 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
540 do_tx_error_icmp = true;
541 else {
542 do_tx_error_icmp = false;
543 dst = addr6->s6_addr32[3];
544 }
545 neigh_release(neigh);
546 if (do_tx_error_icmp)
547 goto tx_error_icmp;
548 }
549#endif
550 else
551 goto tx_error;
552 }
553
554 tos = tnl_params->tos;
555 if (tos & 0x1) {
556 tos &= ~0x1;
557 if (skb->protocol == htons(ETH_P_IP))
558 tos = inner_iph->tos;
559 else if (skb->protocol == htons(ETH_P_IPV6))
560 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
561 }
562
563 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
564 tunnel->parms.iph.protocol,
565 dst, tnl_params->saddr,
566 tunnel->parms.o_key,
567 RT_TOS(tos),
568 tunnel->parms.link);
569 if (IS_ERR(rt)) {
570 dev->stats.tx_carrier_errors++;
571 goto tx_error;
572 }
573 tdev = rt->dst.dev;
574
575 if (tdev == dev) {
576 ip_rt_put(rt);
577 dev->stats.collisions++;
578 goto tx_error;
579 }
580
581 df = tnl_params->frag_off;
582
583 if (df)
584 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
585 - sizeof(struct iphdr);
586 else
587 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
588
589 if (skb_dst(skb))
590 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
591
592 if (skb->protocol == htons(ETH_P_IP)) {
593 df |= (inner_iph->frag_off&htons(IP_DF));
594
595 if (!skb_is_gso(skb) &&
596 (inner_iph->frag_off&htons(IP_DF)) &&
597 mtu < ntohs(inner_iph->tot_len)) {
598 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
599 ip_rt_put(rt);
600 goto tx_error;
601 }
602 }
603#if IS_ENABLED(CONFIG_IPV6)
604 else if (skb->protocol == htons(ETH_P_IPV6)) {
605 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
606
607 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
608 mtu >= IPV6_MIN_MTU) {
609 if ((tunnel->parms.iph.daddr &&
610 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
611 rt6->rt6i_dst.plen == 128) {
612 rt6->rt6i_flags |= RTF_MODIFIED;
613 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
614 }
615 }
616
617 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
618 mtu < skb->len) {
619 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
620 ip_rt_put(rt);
621 goto tx_error;
622 }
623 }
624#endif
625
626 if (tunnel->err_count > 0) {
627 if (time_before(jiffies,
628 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
629 tunnel->err_count--;
630
631 dst_link_failure(skb);
632 } else
633 tunnel->err_count = 0;
634 }
635
636 ttl = tnl_params->ttl;
637 if (ttl == 0) {
638 if (skb->protocol == htons(ETH_P_IP))
639 ttl = inner_iph->ttl;
640#if IS_ENABLED(CONFIG_IPV6)
641 else if (skb->protocol == htons(ETH_P_IPV6))
642 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
643#endif
644 else
645 ttl = ip4_dst_hoplimit(&rt->dst);
646 }
647
648 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
649 + rt->dst.header_len;
650 if (max_headroom > dev->needed_headroom) {
651 dev->needed_headroom = max_headroom;
652 if (skb_cow_head(skb, dev->needed_headroom)) {
653 dev->stats.tx_dropped++;
654 dev_kfree_skb(skb);
655 return;
656 }
657 }
658
659 skb_dst_drop(skb);
660 skb_dst_set(skb, &rt->dst);
661 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
662
663 /* Push down and install the IP header. */
664 skb_push(skb, sizeof(struct iphdr));
665 skb_reset_network_header(skb);
666
667 iph = ip_hdr(skb);
668 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
669
670 iph->version = 4;
671 iph->ihl = sizeof(struct iphdr) >> 2;
672 iph->frag_off = df;
673 iph->protocol = tnl_params->protocol;
674 iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
675 iph->daddr = fl4.daddr;
676 iph->saddr = fl4.saddr;
677 iph->ttl = ttl;
678 tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
679
680 iptunnel_xmit(skb, dev);
681 return;
682
683#if IS_ENABLED(CONFIG_IPV6)
684tx_error_icmp:
685 dst_link_failure(skb);
686#endif
687tx_error:
688 dev->stats.tx_errors++;
689 dev_kfree_skb(skb);
690}
691EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
692
693static void ip_tunnel_update(struct ip_tunnel_net *itn,
694 struct ip_tunnel *t,
695 struct net_device *dev,
696 struct ip_tunnel_parm *p,
697 bool set_mtu)
698{
699 ip_tunnel_del(t);
700 t->parms.iph.saddr = p->iph.saddr;
701 t->parms.iph.daddr = p->iph.daddr;
702 t->parms.i_key = p->i_key;
703 t->parms.o_key = p->o_key;
704 if (dev->type != ARPHRD_ETHER) {
705 memcpy(dev->dev_addr, &p->iph.saddr, 4);
706 memcpy(dev->broadcast, &p->iph.daddr, 4);
707 }
708 ip_tunnel_add(itn, t);
709
710 t->parms.iph.ttl = p->iph.ttl;
711 t->parms.iph.tos = p->iph.tos;
712 t->parms.iph.frag_off = p->iph.frag_off;
713
714 if (t->parms.link != p->link) {
715 int mtu;
716
717 t->parms.link = p->link;
718 mtu = ip_tunnel_bind_dev(dev);
719 if (set_mtu)
720 dev->mtu = mtu;
721 }
722 netdev_state_change(dev);
723}
724
725int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
726{
727 int err = 0;
728 struct ip_tunnel *t;
729 struct net *net = dev_net(dev);
730 struct ip_tunnel *tunnel = netdev_priv(dev);
731 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
732
733 BUG_ON(!itn->fb_tunnel_dev);
734 switch (cmd) {
735 case SIOCGETTUNNEL:
736 t = NULL;
737 if (dev == itn->fb_tunnel_dev)
738 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
739 if (t == NULL)
740 t = netdev_priv(dev);
741 memcpy(p, &t->parms, sizeof(*p));
742 break;
743
744 case SIOCADDTUNNEL:
745 case SIOCCHGTUNNEL:
746 err = -EPERM;
747 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
748 goto done;
749 if (p->iph.ttl)
750 p->iph.frag_off |= htons(IP_DF);
751 if (!(p->i_flags&TUNNEL_KEY))
752 p->i_key = 0;
753 if (!(p->o_flags&TUNNEL_KEY))
754 p->o_key = 0;
755
756 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
757
758 if (!t && (cmd == SIOCADDTUNNEL))
759 t = ip_tunnel_create(net, itn, p);
760
761 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
762 if (t != NULL) {
763 if (t->dev != dev) {
764 err = -EEXIST;
765 break;
766 }
767 } else {
768 unsigned int nflags = 0;
769
770 if (ipv4_is_multicast(p->iph.daddr))
771 nflags = IFF_BROADCAST;
772 else if (p->iph.daddr)
773 nflags = IFF_POINTOPOINT;
774
775 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
776 err = -EINVAL;
777 break;
778 }
779
780 t = netdev_priv(dev);
781 }
782 }
783
784 if (t) {
785 err = 0;
786 ip_tunnel_update(itn, t, dev, p, true);
787 } else
788 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
789 break;
790
791 case SIOCDELTUNNEL:
792 err = -EPERM;
793 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
794 goto done;
795
796 if (dev == itn->fb_tunnel_dev) {
797 err = -ENOENT;
798 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
799 if (t == NULL)
800 goto done;
801 err = -EPERM;
802 if (t == netdev_priv(itn->fb_tunnel_dev))
803 goto done;
804 dev = t->dev;
805 }
806 unregister_netdevice(dev);
807 err = 0;
808 break;
809
810 default:
811 err = -EINVAL;
812 }
813
814done:
815 return err;
816}
817EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
818
819int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
820{
821 struct ip_tunnel *tunnel = netdev_priv(dev);
822 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
823
824 if (new_mtu < 68 ||
825 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
826 return -EINVAL;
827 dev->mtu = new_mtu;
828 return 0;
829}
830EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
831
832static void ip_tunnel_dev_free(struct net_device *dev)
833{
834 struct ip_tunnel *tunnel = netdev_priv(dev);
835
836 gro_cells_destroy(&tunnel->gro_cells);
837 free_percpu(dev->tstats);
838 free_netdev(dev);
839}
840
841void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
842{
843 struct net *net = dev_net(dev);
844 struct ip_tunnel *tunnel = netdev_priv(dev);
845 struct ip_tunnel_net *itn;
846
847 itn = net_generic(net, tunnel->ip_tnl_net_id);
848
849 if (itn->fb_tunnel_dev != dev) {
850 ip_tunnel_del(netdev_priv(dev));
851 unregister_netdevice_queue(dev, head);
852 }
853}
854EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
855
856int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
857 struct rtnl_link_ops *ops, char *devname)
858{
859 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
860 struct ip_tunnel_parm parms;
861
862 itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
863 if (!itn->tunnels)
864 return -ENOMEM;
865
866 if (!ops) {
867 itn->fb_tunnel_dev = NULL;
868 return 0;
869 }
870 memset(&parms, 0, sizeof(parms));
871 if (devname)
872 strlcpy(parms.name, devname, IFNAMSIZ);
873
874 rtnl_lock();
875 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
876 rtnl_unlock();
877 if (IS_ERR(itn->fb_tunnel_dev)) {
878 kfree(itn->tunnels);
879 return PTR_ERR(itn->fb_tunnel_dev);
880 }
881
882 return 0;
883}
884EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
885
886static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
887{
888 int h;
889
890 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
891 struct ip_tunnel *t;
892 struct hlist_node *n;
893 struct hlist_head *thead = &itn->tunnels[h];
894
895 hlist_for_each_entry_safe(t, n, thead, hash_node)
896 unregister_netdevice_queue(t->dev, head);
897 }
898 if (itn->fb_tunnel_dev)
899 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
900}
901
902void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn)
903{
904 LIST_HEAD(list);
905
906 rtnl_lock();
907 ip_tunnel_destroy(itn, &list);
908 unregister_netdevice_many(&list);
909 rtnl_unlock();
910 kfree(itn->tunnels);
911}
912EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
913
914int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
915 struct ip_tunnel_parm *p)
916{
917 struct ip_tunnel *nt;
918 struct net *net = dev_net(dev);
919 struct ip_tunnel_net *itn;
920 int mtu;
921 int err;
922
923 nt = netdev_priv(dev);
924 itn = net_generic(net, nt->ip_tnl_net_id);
925
926 if (ip_tunnel_find(itn, p, dev->type))
927 return -EEXIST;
928
929 nt->parms = *p;
930 err = register_netdevice(dev);
931 if (err)
932 goto out;
933
934 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
935 eth_hw_addr_random(dev);
936
937 mtu = ip_tunnel_bind_dev(dev);
938 if (!tb[IFLA_MTU])
939 dev->mtu = mtu;
940
941 ip_tunnel_add(itn, nt);
942
943out:
944 return err;
945}
946EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
947
948int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
949 struct ip_tunnel_parm *p)
950{
951 struct ip_tunnel *t, *nt;
952 struct net *net = dev_net(dev);
953 struct ip_tunnel *tunnel = netdev_priv(dev);
954 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
955
956 if (dev == itn->fb_tunnel_dev)
957 return -EINVAL;
958
959 nt = netdev_priv(dev);
960
961 t = ip_tunnel_find(itn, p, dev->type);
962
963 if (t) {
964 if (t->dev != dev)
965 return -EEXIST;
966 } else {
967 t = nt;
968
969 if (dev->type != ARPHRD_ETHER) {
970 unsigned int nflags = 0;
971
972 if (ipv4_is_multicast(p->iph.daddr))
973 nflags = IFF_BROADCAST;
974 else if (p->iph.daddr)
975 nflags = IFF_POINTOPOINT;
976
977 if ((dev->flags ^ nflags) &
978 (IFF_POINTOPOINT | IFF_BROADCAST))
979 return -EINVAL;
980 }
981 }
982
983 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
984 return 0;
985}
986EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
987
988int ip_tunnel_init(struct net_device *dev)
989{
990 struct ip_tunnel *tunnel = netdev_priv(dev);
991 struct iphdr *iph = &tunnel->parms.iph;
992 int err;
993
994 dev->destructor = ip_tunnel_dev_free;
995 dev->tstats = alloc_percpu(struct pcpu_tstats);
996 if (!dev->tstats)
997 return -ENOMEM;
998
999 err = gro_cells_init(&tunnel->gro_cells, dev);
1000 if (err) {
1001 free_percpu(dev->tstats);
1002 return err;
1003 }
1004
1005 tunnel->dev = dev;
1006 strcpy(tunnel->parms.name, dev->name);
1007 iph->version = 4;
1008 iph->ihl = 5;
1009
1010 return 0;
1011}
1012EXPORT_SYMBOL_GPL(ip_tunnel_init);
1013
1014void ip_tunnel_uninit(struct net_device *dev)
1015{
1016 struct net *net = dev_net(dev);
1017 struct ip_tunnel *tunnel = netdev_priv(dev);
1018 struct ip_tunnel_net *itn;
1019
1020 itn = net_generic(net, tunnel->ip_tnl_net_id);
1021 /* fb_tunnel_dev will be unregisted in net-exit call. */
1022 if (itn->fb_tunnel_dev != dev)
1023 ip_tunnel_del(netdev_priv(dev));
1024}
1025EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1026
1027/* Do least required initialization, rest of init is done in tunnel_init call */
1028void ip_tunnel_setup(struct net_device *dev, int net_id)
1029{
1030 struct ip_tunnel *tunnel = netdev_priv(dev);
1031 tunnel->ip_tnl_net_id = net_id;
1032}
1033EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1034
1035MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index c3a4233c0ac2..9d2bdb2c1d3f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -38,7 +38,7 @@
38#include <net/sock.h> 38#include <net/sock.h>
39#include <net/ip.h> 39#include <net/ip.h>
40#include <net/icmp.h> 40#include <net/icmp.h>
41#include <net/ipip.h> 41#include <net/ip_tunnels.h>
42#include <net/inet_ecn.h> 42#include <net/inet_ecn.h>
43#include <net/xfrm.h> 43#include <net/xfrm.h>
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
@@ -82,44 +82,6 @@ static int vti_tunnel_bind_dev(struct net_device *dev);
82} while (0) 82} while (0)
83 83
84 84
85static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev,
86 struct rtnl_link_stats64 *tot)
87{
88 int i;
89
90 for_each_possible_cpu(i) {
91 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
92 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
93 unsigned int start;
94
95 do {
96 start = u64_stats_fetch_begin_bh(&tstats->syncp);
97 rx_packets = tstats->rx_packets;
98 tx_packets = tstats->tx_packets;
99 rx_bytes = tstats->rx_bytes;
100 tx_bytes = tstats->tx_bytes;
101 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
102
103 tot->rx_packets += rx_packets;
104 tot->tx_packets += tx_packets;
105 tot->rx_bytes += rx_bytes;
106 tot->tx_bytes += tx_bytes;
107 }
108
109 tot->multicast = dev->stats.multicast;
110 tot->rx_crc_errors = dev->stats.rx_crc_errors;
111 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
112 tot->rx_length_errors = dev->stats.rx_length_errors;
113 tot->rx_errors = dev->stats.rx_errors;
114 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
115 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
116 tot->tx_dropped = dev->stats.tx_dropped;
117 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
118 tot->tx_errors = dev->stats.tx_errors;
119
120 return tot;
121}
122
123static struct ip_tunnel *vti_tunnel_lookup(struct net *net, 85static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
124 __be32 remote, __be32 local) 86 __be32 remote, __be32 local)
125{ 87{
@@ -597,7 +559,7 @@ static const struct net_device_ops vti_netdev_ops = {
597 .ndo_start_xmit = vti_tunnel_xmit, 559 .ndo_start_xmit = vti_tunnel_xmit,
598 .ndo_do_ioctl = vti_tunnel_ioctl, 560 .ndo_do_ioctl = vti_tunnel_ioctl,
599 .ndo_change_mtu = vti_tunnel_change_mtu, 561 .ndo_change_mtu = vti_tunnel_change_mtu,
600 .ndo_get_stats64 = vti_get_stats64, 562 .ndo_get_stats64 = ip_tunnel_get_stats64,
601}; 563};
602 564
603static void vti_dev_free(struct net_device *dev) 565static void vti_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index f01d1b1aff7f..59cb8c769056 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
75 t->props.mode = x->props.mode; 75 t->props.mode = x->props.mode;
76 t->props.saddr.a4 = x->props.saddr.a4; 76 t->props.saddr.a4 = x->props.saddr.a4;
77 t->props.flags = x->props.flags; 77 t->props.flags = x->props.flags;
78 t->props.extra_flags = x->props.extra_flags;
78 memcpy(&t->mark, &x->mark, sizeof(t->mark)); 79 memcpy(&t->mark, &x->mark, sizeof(t->mark));
79 80
80 if (xfrm_init_state(t)) 81 if (xfrm_init_state(t))
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index bf6c5cf31aed..efa1138fa523 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -206,7 +206,7 @@ static int __init ic_open_devs(void)
206 struct ic_device *d, **last; 206 struct ic_device *d, **last;
207 struct net_device *dev; 207 struct net_device *dev;
208 unsigned short oflags; 208 unsigned short oflags;
209 unsigned long start; 209 unsigned long start, next_msg;
210 210
211 last = &ic_first_dev; 211 last = &ic_first_dev;
212 rtnl_lock(); 212 rtnl_lock();
@@ -263,12 +263,23 @@ static int __init ic_open_devs(void)
263 263
264 /* wait for a carrier on at least one device */ 264 /* wait for a carrier on at least one device */
265 start = jiffies; 265 start = jiffies;
266 next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
266 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { 267 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
268 int wait, elapsed;
269
267 for_each_netdev(&init_net, dev) 270 for_each_netdev(&init_net, dev)
268 if (ic_is_init_dev(dev) && netif_carrier_ok(dev)) 271 if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
269 goto have_carrier; 272 goto have_carrier;
270 273
271 msleep(1); 274 msleep(1);
275
276 if time_before(jiffies, next_msg)
277 continue;
278
279 elapsed = jiffies_to_msecs(jiffies - start);
280 wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000;
281 pr_info("Waiting up to %d more seconds for network.\n", wait);
282 next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
272 } 283 }
273have_carrier: 284have_carrier:
274 rtnl_unlock(); 285 rtnl_unlock();
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 8f024d41eefa..77bfcce64fe5 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -111,227 +111,21 @@
111#include <net/sock.h> 111#include <net/sock.h>
112#include <net/ip.h> 112#include <net/ip.h>
113#include <net/icmp.h> 113#include <net/icmp.h>
114#include <net/ipip.h> 114#include <net/ip_tunnels.h>
115#include <net/inet_ecn.h> 115#include <net/inet_ecn.h>
116#include <net/xfrm.h> 116#include <net/xfrm.h>
117#include <net/net_namespace.h> 117#include <net/net_namespace.h>
118#include <net/netns/generic.h> 118#include <net/netns/generic.h>
119 119
120#define HASH_SIZE 16
121#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123static bool log_ecn_error = true; 120static bool log_ecn_error = true;
124module_param(log_ecn_error, bool, 0644); 121module_param(log_ecn_error, bool, 0644);
125MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 123
127static int ipip_net_id __read_mostly; 124static int ipip_net_id __read_mostly;
128struct ipip_net {
129 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132 struct ip_tunnel __rcu *tunnels_wc[1];
133 struct ip_tunnel __rcu **tunnels[4];
134
135 struct net_device *fb_tunnel_dev;
136};
137 125
138static int ipip_tunnel_init(struct net_device *dev); 126static int ipip_tunnel_init(struct net_device *dev);
139static void ipip_tunnel_setup(struct net_device *dev);
140static void ipip_dev_free(struct net_device *dev);
141static struct rtnl_link_ops ipip_link_ops __read_mostly; 127static struct rtnl_link_ops ipip_link_ops __read_mostly;
142 128
143static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144 struct rtnl_link_stats64 *tot)
145{
146 int i;
147
148 for_each_possible_cpu(i) {
149 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
150 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151 unsigned int start;
152
153 do {
154 start = u64_stats_fetch_begin_bh(&tstats->syncp);
155 rx_packets = tstats->rx_packets;
156 tx_packets = tstats->tx_packets;
157 rx_bytes = tstats->rx_bytes;
158 tx_bytes = tstats->tx_bytes;
159 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160
161 tot->rx_packets += rx_packets;
162 tot->tx_packets += tx_packets;
163 tot->rx_bytes += rx_bytes;
164 tot->tx_bytes += tx_bytes;
165 }
166
167 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169 tot->tx_dropped = dev->stats.tx_dropped;
170 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171 tot->tx_errors = dev->stats.tx_errors;
172 tot->collisions = dev->stats.collisions;
173
174 return tot;
175}
176
177static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
178 __be32 remote, __be32 local)
179{
180 unsigned int h0 = HASH(remote);
181 unsigned int h1 = HASH(local);
182 struct ip_tunnel *t;
183 struct ipip_net *ipn = net_generic(net, ipip_net_id);
184
185 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
186 if (local == t->parms.iph.saddr &&
187 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188 return t;
189
190 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
191 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192 return t;
193
194 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
195 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196 return t;
197
198 t = rcu_dereference(ipn->tunnels_wc[0]);
199 if (t && (t->dev->flags&IFF_UP))
200 return t;
201 return NULL;
202}
203
204static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
205 struct ip_tunnel_parm *parms)
206{
207 __be32 remote = parms->iph.daddr;
208 __be32 local = parms->iph.saddr;
209 unsigned int h = 0;
210 int prio = 0;
211
212 if (remote) {
213 prio |= 2;
214 h ^= HASH(remote);
215 }
216 if (local) {
217 prio |= 1;
218 h ^= HASH(local);
219 }
220 return &ipn->tunnels[prio][h];
221}
222
223static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
224 struct ip_tunnel *t)
225{
226 return __ipip_bucket(ipn, &t->parms);
227}
228
229static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
230{
231 struct ip_tunnel __rcu **tp;
232 struct ip_tunnel *iter;
233
234 for (tp = ipip_bucket(ipn, t);
235 (iter = rtnl_dereference(*tp)) != NULL;
236 tp = &iter->next) {
237 if (t == iter) {
238 rcu_assign_pointer(*tp, t->next);
239 break;
240 }
241 }
242}
243
244static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245{
246 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
247
248 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249 rcu_assign_pointer(*tp, t);
250}
251
252static int ipip_tunnel_create(struct net_device *dev)
253{
254 struct ip_tunnel *t = netdev_priv(dev);
255 struct net *net = dev_net(dev);
256 struct ipip_net *ipn = net_generic(net, ipip_net_id);
257 int err;
258
259 err = ipip_tunnel_init(dev);
260 if (err < 0)
261 goto out;
262
263 err = register_netdevice(dev);
264 if (err < 0)
265 goto out;
266
267 strcpy(t->parms.name, dev->name);
268 dev->rtnl_link_ops = &ipip_link_ops;
269
270 dev_hold(dev);
271 ipip_tunnel_link(ipn, t);
272 return 0;
273
274out:
275 return err;
276}
277
278static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
279 struct ip_tunnel_parm *parms, int create)
280{
281 __be32 remote = parms->iph.daddr;
282 __be32 local = parms->iph.saddr;
283 struct ip_tunnel *t, *nt;
284 struct ip_tunnel __rcu **tp;
285 struct net_device *dev;
286 char name[IFNAMSIZ];
287 struct ipip_net *ipn = net_generic(net, ipip_net_id);
288
289 for (tp = __ipip_bucket(ipn, parms);
290 (t = rtnl_dereference(*tp)) != NULL;
291 tp = &t->next) {
292 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
293 return t;
294 }
295 if (!create)
296 return NULL;
297
298 if (parms->name[0])
299 strlcpy(name, parms->name, IFNAMSIZ);
300 else
301 strcpy(name, "tunl%d");
302
303 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
304 if (dev == NULL)
305 return NULL;
306
307 dev_net_set(dev, net);
308
309 nt = netdev_priv(dev);
310 nt->parms = *parms;
311
312 if (ipip_tunnel_create(dev) < 0)
313 goto failed_free;
314
315 return nt;
316
317failed_free:
318 ipip_dev_free(dev);
319 return NULL;
320}
321
322/* called with RTNL */
323static void ipip_tunnel_uninit(struct net_device *dev)
324{
325 struct net *net = dev_net(dev);
326 struct ipip_net *ipn = net_generic(net, ipip_net_id);
327
328 if (dev == ipn->fb_tunnel_dev)
329 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
330 else
331 ipip_tunnel_unlink(ipn, netdev_priv(dev));
332 dev_put(dev);
333}
334
335static int ipip_err(struct sk_buff *skb, u32 info) 129static int ipip_err(struct sk_buff *skb, u32 info)
336{ 130{
337 131
@@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info)
339 8 bytes of packet payload. It means, that precise relaying of 133 8 bytes of packet payload. It means, that precise relaying of
340 ICMP in the real Internet is absolutely infeasible. 134 ICMP in the real Internet is absolutely infeasible.
341 */ 135 */
136 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
342 const struct iphdr *iph = (const struct iphdr *)skb->data; 138 const struct iphdr *iph = (const struct iphdr *)skb->data;
343 const int type = icmp_hdr(skb)->type;
344 const int code = icmp_hdr(skb)->code;
345 struct ip_tunnel *t; 139 struct ip_tunnel *t;
346 int err; 140 int err;
347 141 const int type = icmp_hdr(skb)->type;
348 switch (type) { 142 const int code = icmp_hdr(skb)->code;
349 default:
350 case ICMP_PARAMETERPROB:
351 return 0;
352
353 case ICMP_DEST_UNREACH:
354 switch (code) {
355 case ICMP_SR_FAILED:
356 case ICMP_PORT_UNREACH:
357 /* Impossible event. */
358 return 0;
359 default:
360 /* All others are translated to HOST_UNREACH.
361 rfc2003 contains "deep thoughts" about NET_UNREACH,
362 I believe they are just ether pollution. --ANK
363 */
364 break;
365 }
366 break;
367 case ICMP_TIME_EXCEEDED:
368 if (code != ICMP_EXC_TTL)
369 return 0;
370 break;
371 case ICMP_REDIRECT:
372 break;
373 }
374 143
375 err = -ENOENT; 144 err = -ENOENT;
376 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 145 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
146 iph->daddr, iph->saddr, 0);
377 if (t == NULL) 147 if (t == NULL)
378 goto out; 148 goto out;
379 149
@@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info)
403 else 173 else
404 t->err_count = 1; 174 t->err_count = 1;
405 t->err_time = jiffies; 175 t->err_time = jiffies;
406out:
407 176
177out:
408 return err; 178 return err;
409} 179}
410 180
181static const struct tnl_ptk_info tpi = {
182 /* no tunnel info required for ipip. */
183 .proto = htons(ETH_P_IP),
184};
185
411static int ipip_rcv(struct sk_buff *skb) 186static int ipip_rcv(struct sk_buff *skb)
412{ 187{
188 struct net *net = dev_net(skb->dev);
189 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
413 struct ip_tunnel *tunnel; 190 struct ip_tunnel *tunnel;
414 const struct iphdr *iph = ip_hdr(skb); 191 const struct iphdr *iph = ip_hdr(skb);
415 int err;
416
417 tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
418 if (tunnel != NULL) {
419 struct pcpu_tstats *tstats;
420 192
193 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
194 iph->saddr, iph->daddr, 0);
195 if (tunnel) {
421 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 196 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
422 goto drop; 197 goto drop;
423 198 return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
424 secpath_reset(skb);
425
426 skb->mac_header = skb->network_header;
427 skb_reset_network_header(skb);
428 skb->protocol = htons(ETH_P_IP);
429 skb->pkt_type = PACKET_HOST;
430
431 __skb_tunnel_rx(skb, tunnel->dev);
432
433 err = IP_ECN_decapsulate(iph, skb);
434 if (unlikely(err)) {
435 if (log_ecn_error)
436 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
437 &iph->saddr, iph->tos);
438 if (err > 1) {
439 ++tunnel->dev->stats.rx_frame_errors;
440 ++tunnel->dev->stats.rx_errors;
441 goto drop;
442 }
443 }
444
445 tstats = this_cpu_ptr(tunnel->dev->tstats);
446 u64_stats_update_begin(&tstats->syncp);
447 tstats->rx_packets++;
448 tstats->rx_bytes += skb->len;
449 u64_stats_update_end(&tstats->syncp);
450
451 netif_rx(skb);
452 return 0;
453 } 199 }
454 200
455 return -1; 201 return -1;
@@ -463,329 +209,64 @@ drop:
463 * This function assumes it is being called from dev_queue_xmit() 209 * This function assumes it is being called from dev_queue_xmit()
464 * and that skb is filled properly by that function. 210 * and that skb is filled properly by that function.
465 */ 211 */
466
467static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 212static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
468{ 213{
469 struct ip_tunnel *tunnel = netdev_priv(dev); 214 struct ip_tunnel *tunnel = netdev_priv(dev);
470 const struct iphdr *tiph = &tunnel->parms.iph; 215 const struct iphdr *tiph = &tunnel->parms.iph;
471 u8 tos = tunnel->parms.iph.tos;
472 __be16 df = tiph->frag_off;
473 struct rtable *rt; /* Route to the other host */
474 struct net_device *tdev; /* Device to other host */
475 const struct iphdr *old_iph;
476 struct iphdr *iph; /* Our new IP header */
477 unsigned int max_headroom; /* The extra header space needed */
478 __be32 dst = tiph->daddr;
479 struct flowi4 fl4;
480 int mtu;
481
482 if (skb->protocol != htons(ETH_P_IP))
483 goto tx_error;
484 216
485 if (skb->ip_summed == CHECKSUM_PARTIAL && 217 if (unlikely(skb->protocol != htons(ETH_P_IP)))
486 skb_checksum_help(skb))
487 goto tx_error; 218 goto tx_error;
488 219
489 old_iph = ip_hdr(skb); 220 if (likely(!skb->encapsulation)) {
490 221 skb_reset_inner_headers(skb);
491 if (tos & 1) 222 skb->encapsulation = 1;
492 tos = old_iph->tos;
493
494 if (!dst) {
495 /* NBMA tunnel */
496 if ((rt = skb_rtable(skb)) == NULL) {
497 dev->stats.tx_fifo_errors++;
498 goto tx_error;
499 }
500 dst = rt_nexthop(rt, old_iph->daddr);
501 } 223 }
502 224
503 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, 225 ip_tunnel_xmit(skb, dev, tiph);
504 dst, tiph->saddr,
505 0, 0,
506 IPPROTO_IPIP, RT_TOS(tos),
507 tunnel->parms.link);
508 if (IS_ERR(rt)) {
509 dev->stats.tx_carrier_errors++;
510 goto tx_error_icmp;
511 }
512 tdev = rt->dst.dev;
513
514 if (tdev == dev) {
515 ip_rt_put(rt);
516 dev->stats.collisions++;
517 goto tx_error;
518 }
519
520 df |= old_iph->frag_off & htons(IP_DF);
521
522 if (df) {
523 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
524
525 if (mtu < 68) {
526 dev->stats.collisions++;
527 ip_rt_put(rt);
528 goto tx_error;
529 }
530
531 if (skb_dst(skb))
532 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
533
534 if ((old_iph->frag_off & htons(IP_DF)) &&
535 mtu < ntohs(old_iph->tot_len)) {
536 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
537 htonl(mtu));
538 ip_rt_put(rt);
539 goto tx_error;
540 }
541 }
542
543 if (tunnel->err_count > 0) {
544 if (time_before(jiffies,
545 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
546 tunnel->err_count--;
547 dst_link_failure(skb);
548 } else
549 tunnel->err_count = 0;
550 }
551
552 /*
553 * Okay, now see if we can stuff it in the buffer as-is.
554 */
555 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
556
557 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
558 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
559 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
560 if (!new_skb) {
561 ip_rt_put(rt);
562 dev->stats.tx_dropped++;
563 dev_kfree_skb(skb);
564 return NETDEV_TX_OK;
565 }
566 if (skb->sk)
567 skb_set_owner_w(new_skb, skb->sk);
568 dev_kfree_skb(skb);
569 skb = new_skb;
570 old_iph = ip_hdr(skb);
571 }
572
573 skb->transport_header = skb->network_header;
574 skb_push(skb, sizeof(struct iphdr));
575 skb_reset_network_header(skb);
576 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
578 IPSKB_REROUTED);
579 skb_dst_drop(skb);
580 skb_dst_set(skb, &rt->dst);
581
582 /*
583 * Push down and install the IPIP header.
584 */
585
586 iph = ip_hdr(skb);
587 iph->version = 4;
588 iph->ihl = sizeof(struct iphdr)>>2;
589 iph->frag_off = df;
590 iph->protocol = IPPROTO_IPIP;
591 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
592 iph->daddr = fl4.daddr;
593 iph->saddr = fl4.saddr;
594
595 if ((iph->ttl = tiph->ttl) == 0)
596 iph->ttl = old_iph->ttl;
597
598 iptunnel_xmit(skb, dev);
599 return NETDEV_TX_OK; 226 return NETDEV_TX_OK;
600 227
601tx_error_icmp:
602 dst_link_failure(skb);
603tx_error: 228tx_error:
604 dev->stats.tx_errors++; 229 dev->stats.tx_errors++;
605 dev_kfree_skb(skb); 230 dev_kfree_skb(skb);
606 return NETDEV_TX_OK; 231 return NETDEV_TX_OK;
607} 232}
608 233
609static void ipip_tunnel_bind_dev(struct net_device *dev)
610{
611 struct net_device *tdev = NULL;
612 struct ip_tunnel *tunnel;
613 const struct iphdr *iph;
614
615 tunnel = netdev_priv(dev);
616 iph = &tunnel->parms.iph;
617
618 if (iph->daddr) {
619 struct rtable *rt;
620 struct flowi4 fl4;
621
622 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
623 iph->daddr, iph->saddr,
624 0, 0,
625 IPPROTO_IPIP,
626 RT_TOS(iph->tos),
627 tunnel->parms.link);
628 if (!IS_ERR(rt)) {
629 tdev = rt->dst.dev;
630 ip_rt_put(rt);
631 }
632 dev->flags |= IFF_POINTOPOINT;
633 }
634
635 if (!tdev && tunnel->parms.link)
636 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
637
638 if (tdev) {
639 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
640 dev->mtu = tdev->mtu - sizeof(struct iphdr);
641 }
642 dev->iflink = tunnel->parms.link;
643}
644
645static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
646{
647 struct net *net = dev_net(t->dev);
648 struct ipip_net *ipn = net_generic(net, ipip_net_id);
649
650 ipip_tunnel_unlink(ipn, t);
651 synchronize_net();
652 t->parms.iph.saddr = p->iph.saddr;
653 t->parms.iph.daddr = p->iph.daddr;
654 memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
655 memcpy(t->dev->broadcast, &p->iph.daddr, 4);
656 ipip_tunnel_link(ipn, t);
657 t->parms.iph.ttl = p->iph.ttl;
658 t->parms.iph.tos = p->iph.tos;
659 t->parms.iph.frag_off = p->iph.frag_off;
660 if (t->parms.link != p->link) {
661 t->parms.link = p->link;
662 ipip_tunnel_bind_dev(t->dev);
663 }
664 netdev_state_change(t->dev);
665}
666
667static int 234static int
668ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 235ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
669{ 236{
670 int err = 0; 237 int err = 0;
671 struct ip_tunnel_parm p; 238 struct ip_tunnel_parm p;
672 struct ip_tunnel *t;
673 struct net *net = dev_net(dev);
674 struct ipip_net *ipn = net_generic(net, ipip_net_id);
675
676 switch (cmd) {
677 case SIOCGETTUNNEL:
678 t = NULL;
679 if (dev == ipn->fb_tunnel_dev) {
680 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
681 err = -EFAULT;
682 break;
683 }
684 t = ipip_tunnel_locate(net, &p, 0);
685 }
686 if (t == NULL)
687 t = netdev_priv(dev);
688 memcpy(&p, &t->parms, sizeof(p));
689 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
690 err = -EFAULT;
691 break;
692
693 case SIOCADDTUNNEL:
694 case SIOCCHGTUNNEL:
695 err = -EPERM;
696 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
697 goto done;
698
699 err = -EFAULT;
700 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
701 goto done;
702
703 err = -EINVAL;
704 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
705 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
706 goto done;
707 if (p.iph.ttl)
708 p.iph.frag_off |= htons(IP_DF);
709
710 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
711
712 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
713 if (t != NULL) {
714 if (t->dev != dev) {
715 err = -EEXIST;
716 break;
717 }
718 } else {
719 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
720 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
721 err = -EINVAL;
722 break;
723 }
724 t = netdev_priv(dev);
725 }
726
727 ipip_tunnel_update(t, &p);
728 }
729
730 if (t) {
731 err = 0;
732 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
733 err = -EFAULT;
734 } else
735 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
736 break;
737
738 case SIOCDELTUNNEL:
739 err = -EPERM;
740 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
741 goto done;
742
743 if (dev == ipn->fb_tunnel_dev) {
744 err = -EFAULT;
745 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
746 goto done;
747 err = -ENOENT;
748 if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
749 goto done;
750 err = -EPERM;
751 if (t->dev == ipn->fb_tunnel_dev)
752 goto done;
753 dev = t->dev;
754 }
755 unregister_netdevice(dev);
756 err = 0;
757 break;
758 239
759 default: 240 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
760 err = -EINVAL; 241 return -EFAULT;
761 }
762
763done:
764 return err;
765}
766 242
767static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 243 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
768{ 244 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
769 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) 245 return -EINVAL;
246 if (p.i_key || p.o_key || p.i_flags || p.o_flags)
770 return -EINVAL; 247 return -EINVAL;
771 dev->mtu = new_mtu; 248 if (p.iph.ttl)
249 p.iph.frag_off |= htons(IP_DF);
250
251 err = ip_tunnel_ioctl(dev, &p, cmd);
252 if (err)
253 return err;
254
255 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
256 return -EFAULT;
257
772 return 0; 258 return 0;
773} 259}
774 260
775static const struct net_device_ops ipip_netdev_ops = { 261static const struct net_device_ops ipip_netdev_ops = {
776 .ndo_uninit = ipip_tunnel_uninit, 262 .ndo_init = ipip_tunnel_init,
263 .ndo_uninit = ip_tunnel_uninit,
777 .ndo_start_xmit = ipip_tunnel_xmit, 264 .ndo_start_xmit = ipip_tunnel_xmit,
778 .ndo_do_ioctl = ipip_tunnel_ioctl, 265 .ndo_do_ioctl = ipip_tunnel_ioctl,
779 .ndo_change_mtu = ipip_tunnel_change_mtu, 266 .ndo_change_mtu = ip_tunnel_change_mtu,
780 .ndo_get_stats64 = ipip_get_stats64, 267 .ndo_get_stats64 = ip_tunnel_get_stats64,
781}; 268};
782 269
783static void ipip_dev_free(struct net_device *dev)
784{
785 free_percpu(dev->tstats);
786 free_netdev(dev);
787}
788
789#define IPIP_FEATURES (NETIF_F_SG | \ 270#define IPIP_FEATURES (NETIF_F_SG | \
790 NETIF_F_FRAGLIST | \ 271 NETIF_F_FRAGLIST | \
791 NETIF_F_HIGHDMA | \ 272 NETIF_F_HIGHDMA | \
@@ -794,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev)
794static void ipip_tunnel_setup(struct net_device *dev) 275static void ipip_tunnel_setup(struct net_device *dev)
795{ 276{
796 dev->netdev_ops = &ipip_netdev_ops; 277 dev->netdev_ops = &ipip_netdev_ops;
797 dev->destructor = ipip_dev_free;
798 278
799 dev->type = ARPHRD_TUNNEL; 279 dev->type = ARPHRD_TUNNEL;
800 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
801 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
802 dev->flags = IFF_NOARP; 280 dev->flags = IFF_NOARP;
803 dev->iflink = 0; 281 dev->iflink = 0;
804 dev->addr_len = 4; 282 dev->addr_len = 4;
@@ -808,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev)
808 286
809 dev->features |= IPIP_FEATURES; 287 dev->features |= IPIP_FEATURES;
810 dev->hw_features |= IPIP_FEATURES; 288 dev->hw_features |= IPIP_FEATURES;
289 ip_tunnel_setup(dev, ipip_net_id);
811} 290}
812 291
813static int ipip_tunnel_init(struct net_device *dev) 292static int ipip_tunnel_init(struct net_device *dev)
814{ 293{
815 struct ip_tunnel *tunnel = netdev_priv(dev); 294 struct ip_tunnel *tunnel = netdev_priv(dev);
816 295
817 tunnel->dev = dev;
818
819 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 296 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
820 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 297 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
821 298
822 ipip_tunnel_bind_dev(dev); 299 tunnel->hlen = 0;
823 300 tunnel->parms.iph.protocol = IPPROTO_IPIP;
824 dev->tstats = alloc_percpu(struct pcpu_tstats); 301 return ip_tunnel_init(dev);
825 if (!dev->tstats)
826 return -ENOMEM;
827
828 return 0;
829}
830
831static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
832{
833 struct ip_tunnel *tunnel = netdev_priv(dev);
834 struct iphdr *iph = &tunnel->parms.iph;
835 struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
836
837 tunnel->dev = dev;
838 strcpy(tunnel->parms.name, dev->name);
839
840 iph->version = 4;
841 iph->protocol = IPPROTO_IPIP;
842 iph->ihl = 5;
843
844 dev->tstats = alloc_percpu(struct pcpu_tstats);
845 if (!dev->tstats)
846 return -ENOMEM;
847
848 dev_hold(dev);
849 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
850 return 0;
851} 302}
852 303
853static void ipip_netlink_parms(struct nlattr *data[], 304static void ipip_netlink_parms(struct nlattr *data[],
@@ -887,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[],
887static int ipip_newlink(struct net *src_net, struct net_device *dev, 338static int ipip_newlink(struct net *src_net, struct net_device *dev,
888 struct nlattr *tb[], struct nlattr *data[]) 339 struct nlattr *tb[], struct nlattr *data[])
889{ 340{
890 struct net *net = dev_net(dev); 341 struct ip_tunnel_parm p;
891 struct ip_tunnel *nt;
892
893 nt = netdev_priv(dev);
894 ipip_netlink_parms(data, &nt->parms);
895
896 if (ipip_tunnel_locate(net, &nt->parms, 0))
897 return -EEXIST;
898 342
899 return ipip_tunnel_create(dev); 343 ipip_netlink_parms(data, &p);
344 return ip_tunnel_newlink(dev, tb, &p);
900} 345}
901 346
902static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], 347static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
903 struct nlattr *data[]) 348 struct nlattr *data[])
904{ 349{
905 struct ip_tunnel *t;
906 struct ip_tunnel_parm p; 350 struct ip_tunnel_parm p;
907 struct net *net = dev_net(dev);
908 struct ipip_net *ipn = net_generic(net, ipip_net_id);
909
910 if (dev == ipn->fb_tunnel_dev)
911 return -EINVAL;
912 351
913 ipip_netlink_parms(data, &p); 352 ipip_netlink_parms(data, &p);
914 353
@@ -916,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
916 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) 355 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
917 return -EINVAL; 356 return -EINVAL;
918 357
919 t = ipip_tunnel_locate(net, &p, 0); 358 return ip_tunnel_changelink(dev, tb, &p);
920
921 if (t) {
922 if (t->dev != dev)
923 return -EEXIST;
924 } else
925 t = netdev_priv(dev);
926
927 ipip_tunnel_update(t, &p);
928 return 0;
929} 359}
930 360
931static size_t ipip_get_size(const struct net_device *dev) 361static size_t ipip_get_size(const struct net_device *dev)
@@ -982,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
982 .setup = ipip_tunnel_setup, 412 .setup = ipip_tunnel_setup,
983 .newlink = ipip_newlink, 413 .newlink = ipip_newlink,
984 .changelink = ipip_changelink, 414 .changelink = ipip_changelink,
415 .dellink = ip_tunnel_dellink,
985 .get_size = ipip_get_size, 416 .get_size = ipip_get_size,
986 .fill_info = ipip_fill_info, 417 .fill_info = ipip_fill_info,
987}; 418};
@@ -992,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
992 .priority = 1, 423 .priority = 1,
993}; 424};
994 425
995static const char banner[] __initconst =
996 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
997
998static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
999{
1000 int prio;
1001
1002 for (prio = 1; prio < 4; prio++) {
1003 int h;
1004 for (h = 0; h < HASH_SIZE; h++) {
1005 struct ip_tunnel *t;
1006
1007 t = rtnl_dereference(ipn->tunnels[prio][h]);
1008 while (t != NULL) {
1009 unregister_netdevice_queue(t->dev, head);
1010 t = rtnl_dereference(t->next);
1011 }
1012 }
1013 }
1014}
1015
1016static int __net_init ipip_init_net(struct net *net) 426static int __net_init ipip_init_net(struct net *net)
1017{ 427{
1018 struct ipip_net *ipn = net_generic(net, ipip_net_id); 428 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
1019 struct ip_tunnel *t;
1020 int err;
1021
1022 ipn->tunnels[0] = ipn->tunnels_wc;
1023 ipn->tunnels[1] = ipn->tunnels_l;
1024 ipn->tunnels[2] = ipn->tunnels_r;
1025 ipn->tunnels[3] = ipn->tunnels_r_l;
1026
1027 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
1028 "tunl0",
1029 ipip_tunnel_setup);
1030 if (!ipn->fb_tunnel_dev) {
1031 err = -ENOMEM;
1032 goto err_alloc_dev;
1033 }
1034 dev_net_set(ipn->fb_tunnel_dev, net);
1035
1036 err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
1037 if (err)
1038 goto err_reg_dev;
1039
1040 if ((err = register_netdev(ipn->fb_tunnel_dev)))
1041 goto err_reg_dev;
1042
1043 t = netdev_priv(ipn->fb_tunnel_dev);
1044
1045 strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
1046 return 0;
1047
1048err_reg_dev:
1049 ipip_dev_free(ipn->fb_tunnel_dev);
1050err_alloc_dev:
1051 /* nothing */
1052 return err;
1053} 429}
1054 430
1055static void __net_exit ipip_exit_net(struct net *net) 431static void __net_exit ipip_exit_net(struct net *net)
1056{ 432{
1057 struct ipip_net *ipn = net_generic(net, ipip_net_id); 433 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
1058 LIST_HEAD(list); 434 ip_tunnel_delete_net(itn);
1059
1060 rtnl_lock();
1061 ipip_destroy_tunnels(ipn, &list);
1062 unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
1063 unregister_netdevice_many(&list);
1064 rtnl_unlock();
1065} 435}
1066 436
1067static struct pernet_operations ipip_net_ops = { 437static struct pernet_operations ipip_net_ops = {
1068 .init = ipip_init_net, 438 .init = ipip_init_net,
1069 .exit = ipip_exit_net, 439 .exit = ipip_exit_net,
1070 .id = &ipip_net_id, 440 .id = &ipip_net_id,
1071 .size = sizeof(struct ipip_net), 441 .size = sizeof(struct ip_tunnel_net),
1072}; 442};
1073 443
1074static int __init ipip_init(void) 444static int __init ipip_init(void)
1075{ 445{
1076 int err; 446 int err;
1077 447
1078 printk(banner); 448 pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
1079 449
1080 err = register_pernet_device(&ipip_net_ops); 450 err = register_pernet_device(&ipip_net_ops);
1081 if (err < 0) 451 if (err < 0)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5f95b3aa579e..9d9610ae7855 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -61,7 +61,7 @@
61#include <linux/netfilter_ipv4.h> 61#include <linux/netfilter_ipv4.h>
62#include <linux/compat.h> 62#include <linux/compat.h>
63#include <linux/export.h> 63#include <linux/export.h>
64#include <net/ipip.h> 64#include <net/ip_tunnels.h>
65#include <net/checksum.h> 65#include <net/checksum.h>
66#include <net/netlink.h> 66#include <net/netlink.h>
67#include <net/fib_rules.h> 67#include <net/fib_rules.h>
@@ -626,9 +626,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
626 if (ip_hdr(skb)->version == 0) { 626 if (ip_hdr(skb)->version == 0) {
627 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 627 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
628 nlh->nlmsg_type = NLMSG_ERROR; 628 nlh->nlmsg_type = NLMSG_ERROR;
629 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 629 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
630 skb_trim(skb, nlh->nlmsg_len); 630 skb_trim(skb, nlh->nlmsg_len);
631 e = NLMSG_DATA(nlh); 631 e = nlmsg_data(nlh);
632 e->error = -ETIMEDOUT; 632 e->error = -ETIMEDOUT;
633 memset(&e->msg, 0, sizeof(e->msg)); 633 memset(&e->msg, 0, sizeof(e->msg));
634 634
@@ -910,14 +910,14 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
910 if (ip_hdr(skb)->version == 0) { 910 if (ip_hdr(skb)->version == 0) {
911 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 911 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
912 912
913 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 913 if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
914 nlh->nlmsg_len = skb_tail_pointer(skb) - 914 nlh->nlmsg_len = skb_tail_pointer(skb) -
915 (u8 *)nlh; 915 (u8 *)nlh;
916 } else { 916 } else {
917 nlh->nlmsg_type = NLMSG_ERROR; 917 nlh->nlmsg_type = NLMSG_ERROR;
918 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 918 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
919 skb_trim(skb, nlh->nlmsg_len); 919 skb_trim(skb, nlh->nlmsg_len);
920 e = NLMSG_DATA(nlh); 920 e = nlmsg_data(nlh);
921 e->error = -EMSGSIZE; 921 e->error = -EMSGSIZE;
922 memset(&e->msg, 0, sizeof(e->msg)); 922 memset(&e->msg, 0, sizeof(e->msg));
923 } 923 }
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 4c0cf63dd92e..c3e0adea9c27 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -1,4 +1,9 @@
1/* IPv4 specific functions of netfilter core */ 1/*
2 * IPv4 specific functions of netfilter core
3 *
4 * Rusty Russell (C) 2000 -- This code is GPL.
5 * Patrick McHardy (C) 2006-2012
6 */
2#include <linux/kernel.h> 7#include <linux/kernel.h>
3#include <linux/netfilter.h> 8#include <linux/netfilter.h>
4#include <linux/netfilter_ipv4.h> 9#include <linux/netfilter_ipv4.h>
@@ -40,14 +45,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
40 fl4.flowi4_flags = flags; 45 fl4.flowi4_flags = flags;
41 rt = ip_route_output_key(net, &fl4); 46 rt = ip_route_output_key(net, &fl4);
42 if (IS_ERR(rt)) 47 if (IS_ERR(rt))
43 return -1; 48 return PTR_ERR(rt);
44 49
45 /* Drop old route. */ 50 /* Drop old route. */
46 skb_dst_drop(skb); 51 skb_dst_drop(skb);
47 skb_dst_set(skb, &rt->dst); 52 skb_dst_set(skb, &rt->dst);
48 53
49 if (skb_dst(skb)->error) 54 if (skb_dst(skb)->error)
50 return -1; 55 return skb_dst(skb)->error;
51 56
52#ifdef CONFIG_XFRM 57#ifdef CONFIG_XFRM
53 if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 58 if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -56,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
56 skb_dst_set(skb, NULL); 61 skb_dst_set(skb, NULL);
57 dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); 62 dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
58 if (IS_ERR(dst)) 63 if (IS_ERR(dst))
59 return -1; 64 return PTR_ERR(dst);;
60 skb_dst_set(skb, dst); 65 skb_dst_set(skb, dst);
61 } 66 }
62#endif 67#endif
@@ -66,7 +71,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
66 if (skb_headroom(skb) < hh_len && 71 if (skb_headroom(skb) < hh_len &&
67 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 72 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
68 0, GFP_ATOMIC)) 73 0, GFP_ATOMIC))
69 return -1; 74 return -ENOMEM;
70 75
71 return 0; 76 return 0;
72} 77}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 0d755c50994b..e7916c193932 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -71,7 +71,7 @@ config IP_NF_MATCH_ECN
71 71
72config IP_NF_MATCH_RPFILTER 72config IP_NF_MATCH_RPFILTER
73 tristate '"rpfilter" reverse path filter match support' 73 tristate '"rpfilter" reverse path filter match support'
74 depends on NETFILTER_ADVANCED 74 depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)
75 ---help--- 75 ---help---
76 This option allows you to match packets whose replies would 76 This option allows you to match packets whose replies would
77 go out via the interface the packet came in. 77 go out via the interface the packet came in.
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 7dc6a9743592..85a4f21aac1a 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -6,6 +6,7 @@
6 * Some ARP specific bits are: 6 * Some ARP specific bits are:
7 * 7 *
8 * Copyright (C) 2002 David S. Miller (davem@redhat.com) 8 * Copyright (C) 2002 David S. Miller (davem@redhat.com)
9 * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net>
9 * 10 *
10 */ 11 */
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 79ca5e70d497..eadab1ed6500 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -48,9 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
48 net->ipv4.arptable_filter = 48 net->ipv4.arptable_filter =
49 arpt_register_table(net, &packet_filter, repl); 49 arpt_register_table(net, &packet_filter, repl);
50 kfree(repl); 50 kfree(repl);
51 if (IS_ERR(net->ipv4.arptable_filter)) 51 return PTR_RET(net->ipv4.arptable_filter);
52 return PTR_ERR(net->ipv4.arptable_filter);
53 return 0;
54} 52}
55 53
56static void __net_exit arptable_filter_net_exit(struct net *net) 54static void __net_exit arptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3efcf87400c3..d23118d95ff9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling 4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> 5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6 * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -182,8 +183,7 @@ ipt_get_target_c(const struct ipt_entry *e)
182 return ipt_get_target((struct ipt_entry *)e); 183 return ipt_get_target((struct ipt_entry *)e);
183} 184}
184 185
185#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 186#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
186 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
187static const char *const hooknames[] = { 187static const char *const hooknames[] = {
188 [NF_INET_PRE_ROUTING] = "PREROUTING", 188 [NF_INET_PRE_ROUTING] = "PREROUTING",
189 [NF_INET_LOCAL_IN] = "INPUT", 189 [NF_INET_LOCAL_IN] = "INPUT",
@@ -259,6 +259,7 @@ static void trace_packet(const struct sk_buff *skb,
259 const char *hookname, *chainname, *comment; 259 const char *hookname, *chainname, *comment;
260 const struct ipt_entry *iter; 260 const struct ipt_entry *iter;
261 unsigned int rulenum = 0; 261 unsigned int rulenum = 0;
262 struct net *net = dev_net(in ? in : out);
262 263
263 table_base = private->entries[smp_processor_id()]; 264 table_base = private->entries[smp_processor_id()];
264 root = get_entry(table_base, private->hook_entry[hook]); 265 root = get_entry(table_base, private->hook_entry[hook]);
@@ -271,7 +272,7 @@ static void trace_packet(const struct sk_buff *skb,
271 &chainname, &comment, &rulenum) != 0) 272 &chainname, &comment, &rulenum) != 0)
272 break; 273 break;
273 274
274 nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, 275 nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
275 "TRACE: %s:%s:%s:%u ", 276 "TRACE: %s:%s:%s:%u ",
276 tablename, chainname, comment, rulenum); 277 tablename, chainname, comment, rulenum);
277} 278}
@@ -361,8 +362,7 @@ ipt_do_table(struct sk_buff *skb,
361 t = ipt_get_target(e); 362 t = ipt_get_target(e);
362 IP_NF_ASSERT(t->u.kernel.target); 363 IP_NF_ASSERT(t->u.kernel.target);
363 364
364#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 365#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
365 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
366 /* The packet is traced: log it */ 366 /* The packet is traced: log it */
367 if (unlikely(skb->nf_trace)) 367 if (unlikely(skb->nf_trace))
368 trace_packet(skb, hook, in, out, 368 trace_packet(skb, hook, in, out,
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 7d168dcbd135..f8a222cb6448 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -4,6 +4,7 @@
4 * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> 4 * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
5 * (C) 1999-2001 Paul `Rusty' Russell 5 * (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2005-2007 Patrick McHardy <kaber@trash.net>
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -37,7 +38,7 @@
37#include <linux/skbuff.h> 38#include <linux/skbuff.h>
38#include <linux/kernel.h> 39#include <linux/kernel.h>
39#include <linux/timer.h> 40#include <linux/timer.h>
40#include <linux/netlink.h> 41#include <net/netlink.h>
41#include <linux/netdevice.h> 42#include <linux/netdevice.h>
42#include <linux/mm.h> 43#include <linux/mm.h>
43#include <linux/moduleparam.h> 44#include <linux/moduleparam.h>
@@ -45,6 +46,7 @@
45#include <linux/netfilter/x_tables.h> 46#include <linux/netfilter/x_tables.h>
46#include <linux/netfilter_ipv4/ipt_ULOG.h> 47#include <linux/netfilter_ipv4/ipt_ULOG.h>
47#include <net/netfilter/nf_log.h> 48#include <net/netfilter/nf_log.h>
49#include <net/netns/generic.h>
48#include <net/sock.h> 50#include <net/sock.h>
49#include <linux/bitops.h> 51#include <linux/bitops.h>
50#include <asm/unaligned.h> 52#include <asm/unaligned.h>
@@ -78,15 +80,23 @@ typedef struct {
78 struct timer_list timer; /* the timer function */ 80 struct timer_list timer; /* the timer function */
79} ulog_buff_t; 81} ulog_buff_t;
80 82
81static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */ 83static int ulog_net_id __read_mostly;
84struct ulog_net {
85 unsigned int nlgroup[ULOG_MAXNLGROUPS];
86 ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
87 struct sock *nflognl;
88 spinlock_t lock;
89};
82 90
83static struct sock *nflognl; /* our socket */ 91static struct ulog_net *ulog_pernet(struct net *net)
84static DEFINE_SPINLOCK(ulog_lock); /* spinlock */ 92{
93 return net_generic(net, ulog_net_id);
94}
85 95
86/* send one ulog_buff_t to userspace */ 96/* send one ulog_buff_t to userspace */
87static void ulog_send(unsigned int nlgroupnum) 97static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
88{ 98{
89 ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; 99 ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
90 100
91 pr_debug("ulog_send: timer is deleting\n"); 101 pr_debug("ulog_send: timer is deleting\n");
92 del_timer(&ub->timer); 102 del_timer(&ub->timer);
@@ -103,7 +113,8 @@ static void ulog_send(unsigned int nlgroupnum)
103 NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; 113 NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
104 pr_debug("throwing %d packets to netlink group %u\n", 114 pr_debug("throwing %d packets to netlink group %u\n",
105 ub->qlen, nlgroupnum + 1); 115 ub->qlen, nlgroupnum + 1);
106 netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); 116 netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
117 GFP_ATOMIC);
107 118
108 ub->qlen = 0; 119 ub->qlen = 0;
109 ub->skb = NULL; 120 ub->skb = NULL;
@@ -114,13 +125,16 @@ static void ulog_send(unsigned int nlgroupnum)
114/* timer function to flush queue in flushtimeout time */ 125/* timer function to flush queue in flushtimeout time */
115static void ulog_timer(unsigned long data) 126static void ulog_timer(unsigned long data)
116{ 127{
128 struct ulog_net *ulog = container_of((void *)data,
129 struct ulog_net,
130 nlgroup[*(unsigned int *)data]);
117 pr_debug("timer function called, calling ulog_send\n"); 131 pr_debug("timer function called, calling ulog_send\n");
118 132
119 /* lock to protect against somebody modifying our structure 133 /* lock to protect against somebody modifying our structure
120 * from ipt_ulog_target at the same time */ 134 * from ipt_ulog_target at the same time */
121 spin_lock_bh(&ulog_lock); 135 spin_lock_bh(&ulog->lock);
122 ulog_send(data); 136 ulog_send(ulog, data);
123 spin_unlock_bh(&ulog_lock); 137 spin_unlock_bh(&ulog->lock);
124} 138}
125 139
126static struct sk_buff *ulog_alloc_skb(unsigned int size) 140static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -160,6 +174,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
160 size_t size, copy_len; 174 size_t size, copy_len;
161 struct nlmsghdr *nlh; 175 struct nlmsghdr *nlh;
162 struct timeval tv; 176 struct timeval tv;
177 struct net *net = dev_net(in ? in : out);
178 struct ulog_net *ulog = ulog_pernet(net);
163 179
164 /* ffs == find first bit set, necessary because userspace 180 /* ffs == find first bit set, necessary because userspace
165 * is already shifting groupnumber, but we need unshifted. 181 * is already shifting groupnumber, but we need unshifted.
@@ -172,11 +188,11 @@ static void ipt_ulog_packet(unsigned int hooknum,
172 else 188 else
173 copy_len = loginfo->copy_range; 189 copy_len = loginfo->copy_range;
174 190
175 size = NLMSG_SPACE(sizeof(*pm) + copy_len); 191 size = nlmsg_total_size(sizeof(*pm) + copy_len);
176 192
177 ub = &ulog_buffers[groupnum]; 193 ub = &ulog->ulog_buffers[groupnum];
178 194
179 spin_lock_bh(&ulog_lock); 195 spin_lock_bh(&ulog->lock);
180 196
181 if (!ub->skb) { 197 if (!ub->skb) {
182 if (!(ub->skb = ulog_alloc_skb(size))) 198 if (!(ub->skb = ulog_alloc_skb(size)))
@@ -186,7 +202,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
186 /* either the queue len is too high or we don't have 202 /* either the queue len is too high or we don't have
187 * enough room in nlskb left. send it to userspace. */ 203 * enough room in nlskb left. send it to userspace. */
188 204
189 ulog_send(groupnum); 205 ulog_send(ulog, groupnum);
190 206
191 if (!(ub->skb = ulog_alloc_skb(size))) 207 if (!(ub->skb = ulog_alloc_skb(size)))
192 goto alloc_failure; 208 goto alloc_failure;
@@ -260,16 +276,16 @@ static void ipt_ulog_packet(unsigned int hooknum,
260 if (ub->qlen >= loginfo->qthreshold) { 276 if (ub->qlen >= loginfo->qthreshold) {
261 if (loginfo->qthreshold > 1) 277 if (loginfo->qthreshold > 1)
262 nlh->nlmsg_type = NLMSG_DONE; 278 nlh->nlmsg_type = NLMSG_DONE;
263 ulog_send(groupnum); 279 ulog_send(ulog, groupnum);
264 } 280 }
265out_unlock: 281out_unlock:
266 spin_unlock_bh(&ulog_lock); 282 spin_unlock_bh(&ulog->lock);
267 283
268 return; 284 return;
269 285
270alloc_failure: 286alloc_failure:
271 pr_debug("Error building netlink message\n"); 287 pr_debug("Error building netlink message\n");
272 spin_unlock_bh(&ulog_lock); 288 spin_unlock_bh(&ulog->lock);
273} 289}
274 290
275static unsigned int 291static unsigned int
@@ -376,54 +392,43 @@ static struct nf_logger ipt_ulog_logger __read_mostly = {
376 .me = THIS_MODULE, 392 .me = THIS_MODULE,
377}; 393};
378 394
379static int __init ulog_tg_init(void) 395static int __net_init ulog_tg_net_init(struct net *net)
380{ 396{
381 int ret, i; 397 int i;
398 struct ulog_net *ulog = ulog_pernet(net);
382 struct netlink_kernel_cfg cfg = { 399 struct netlink_kernel_cfg cfg = {
383 .groups = ULOG_MAXNLGROUPS, 400 .groups = ULOG_MAXNLGROUPS,
384 }; 401 };
385 402
386 pr_debug("init module\n"); 403 spin_lock_init(&ulog->lock);
387
388 if (nlbufsiz > 128*1024) {
389 pr_warning("Netlink buffer has to be <= 128kB\n");
390 return -EINVAL;
391 }
392
393 /* initialize ulog_buffers */ 404 /* initialize ulog_buffers */
394 for (i = 0; i < ULOG_MAXNLGROUPS; i++) 405 for (i = 0; i < ULOG_MAXNLGROUPS; i++)
395 setup_timer(&ulog_buffers[i].timer, ulog_timer, i); 406 setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, i);
396 407
397 nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); 408 ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
398 if (!nflognl) 409 if (!ulog->nflognl)
399 return -ENOMEM; 410 return -ENOMEM;
400 411
401 ret = xt_register_target(&ulog_tg_reg);
402 if (ret < 0) {
403 netlink_kernel_release(nflognl);
404 return ret;
405 }
406 if (nflog) 412 if (nflog)
407 nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); 413 nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
408 414
409 return 0; 415 return 0;
410} 416}
411 417
412static void __exit ulog_tg_exit(void) 418static void __net_exit ulog_tg_net_exit(struct net *net)
413{ 419{
414 ulog_buff_t *ub; 420 ulog_buff_t *ub;
415 int i; 421 int i;
416 422 struct ulog_net *ulog = ulog_pernet(net);
417 pr_debug("cleanup_module\n");
418 423
419 if (nflog) 424 if (nflog)
420 nf_log_unregister(&ipt_ulog_logger); 425 nf_log_unset(net, &ipt_ulog_logger);
421 xt_unregister_target(&ulog_tg_reg); 426
422 netlink_kernel_release(nflognl); 427 netlink_kernel_release(ulog->nflognl);
423 428
424 /* remove pending timers and free allocated skb's */ 429 /* remove pending timers and free allocated skb's */
425 for (i = 0; i < ULOG_MAXNLGROUPS; i++) { 430 for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
426 ub = &ulog_buffers[i]; 431 ub = &ulog->ulog_buffers[i];
427 pr_debug("timer is deleting\n"); 432 pr_debug("timer is deleting\n");
428 del_timer(&ub->timer); 433 del_timer(&ub->timer);
429 434
@@ -434,5 +439,50 @@ static void __exit ulog_tg_exit(void)
434 } 439 }
435} 440}
436 441
442static struct pernet_operations ulog_tg_net_ops = {
443 .init = ulog_tg_net_init,
444 .exit = ulog_tg_net_exit,
445 .id = &ulog_net_id,
446 .size = sizeof(struct ulog_net),
447};
448
449static int __init ulog_tg_init(void)
450{
451 int ret;
452 pr_debug("init module\n");
453
454 if (nlbufsiz > 128*1024) {
455 pr_warn("Netlink buffer has to be <= 128kB\n");
456 return -EINVAL;
457 }
458
459 ret = register_pernet_subsys(&ulog_tg_net_ops);
460 if (ret)
461 goto out_pernet;
462
463 ret = xt_register_target(&ulog_tg_reg);
464 if (ret < 0)
465 goto out_target;
466
467 if (nflog)
468 nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
469
470 return 0;
471
472out_target:
473 unregister_pernet_subsys(&ulog_tg_net_ops);
474out_pernet:
475 return ret;
476}
477
478static void __exit ulog_tg_exit(void)
479{
480 pr_debug("cleanup_module\n");
481 if (nflog)
482 nf_log_unregister(&ipt_ulog_logger);
483 xt_unregister_target(&ulog_tg_reg);
484 unregister_pernet_subsys(&ulog_tg_net_ops);
485}
486
437module_init(ulog_tg_init); 487module_init(ulog_tg_init);
438module_exit(ulog_tg_exit); 488module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 85d88f206447..cba5658ec82c 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -44,6 +44,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
44 u_int8_t tos; 44 u_int8_t tos;
45 __be32 saddr, daddr; 45 __be32 saddr, daddr;
46 u_int32_t mark; 46 u_int32_t mark;
47 int err;
47 48
48 /* root is playing with raw sockets. */ 49 /* root is playing with raw sockets. */
49 if (skb->len < sizeof(struct iphdr) || 50 if (skb->len < sizeof(struct iphdr) ||
@@ -66,9 +67,11 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
66 if (iph->saddr != saddr || 67 if (iph->saddr != saddr ||
67 iph->daddr != daddr || 68 iph->daddr != daddr ||
68 skb->mark != mark || 69 skb->mark != mark ||
69 iph->tos != tos) 70 iph->tos != tos) {
70 if (ip_route_me_harder(skb, RTN_UNSPEC)) 71 err = ip_route_me_harder(skb, RTN_UNSPEC);
71 ret = NF_DROP; 72 if (err < 0)
73 ret = NF_DROP_ERR(err);
74 }
72 } 75 }
73 76
74 return ret; 77 return ret;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index eeaff7e4acb5..6383273d54e1 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -176,6 +176,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
176#ifdef CONFIG_XFRM 176#ifdef CONFIG_XFRM
177 const struct nf_conn *ct; 177 const struct nf_conn *ct;
178 enum ip_conntrack_info ctinfo; 178 enum ip_conntrack_info ctinfo;
179 int err;
179#endif 180#endif
180 unsigned int ret; 181 unsigned int ret;
181 182
@@ -195,9 +196,11 @@ nf_nat_ipv4_out(unsigned int hooknum,
195 ct->tuplehash[!dir].tuple.dst.u3.ip) || 196 ct->tuplehash[!dir].tuple.dst.u3.ip) ||
196 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 197 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
197 ct->tuplehash[dir].tuple.src.u.all != 198 ct->tuplehash[dir].tuple.src.u.all !=
198 ct->tuplehash[!dir].tuple.dst.u.all)) 199 ct->tuplehash[!dir].tuple.dst.u.all)) {
199 if (nf_xfrm_me_harder(skb, AF_INET) < 0) 200 err = nf_xfrm_me_harder(skb, AF_INET);
200 ret = NF_DROP; 201 if (err < 0)
202 ret = NF_DROP_ERR(err);
203 }
201 } 204 }
202#endif 205#endif
203 return ret; 206 return ret;
@@ -213,6 +216,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
213 const struct nf_conn *ct; 216 const struct nf_conn *ct;
214 enum ip_conntrack_info ctinfo; 217 enum ip_conntrack_info ctinfo;
215 unsigned int ret; 218 unsigned int ret;
219 int err;
216 220
217 /* root is playing with raw sockets. */ 221 /* root is playing with raw sockets. */
218 if (skb->len < sizeof(struct iphdr) || 222 if (skb->len < sizeof(struct iphdr) ||
@@ -226,16 +230,19 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
226 230
227 if (ct->tuplehash[dir].tuple.dst.u3.ip != 231 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
228 ct->tuplehash[!dir].tuple.src.u3.ip) { 232 ct->tuplehash[!dir].tuple.src.u3.ip) {
229 if (ip_route_me_harder(skb, RTN_UNSPEC)) 233 err = ip_route_me_harder(skb, RTN_UNSPEC);
230 ret = NF_DROP; 234 if (err < 0)
235 ret = NF_DROP_ERR(err);
231 } 236 }
232#ifdef CONFIG_XFRM 237#ifdef CONFIG_XFRM
233 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 238 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
234 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 239 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
235 ct->tuplehash[dir].tuple.dst.u.all != 240 ct->tuplehash[dir].tuple.dst.u.all !=
236 ct->tuplehash[!dir].tuple.src.u.all) 241 ct->tuplehash[!dir].tuple.src.u.all) {
237 if (nf_xfrm_me_harder(skb, AF_INET) < 0) 242 err = nf_xfrm_me_harder(skb, AF_INET);
238 ret = NF_DROP; 243 if (err < 0)
244 ret = NF_DROP_ERR(err);
245 }
239#endif 246#endif
240 } 247 }
241 return ret; 248 return ret;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2820aa18b542..567d84168bd2 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -1,6 +1,7 @@
1 1
2/* (C) 1999-2001 Paul `Rusty' Russell 2/* (C) 1999-2001 Paul `Rusty' Russell
3 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 3 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f2ca12794081..4c48e434bb1f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * (C) 1999-2001 Paul `Rusty' Russell 3 * (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5241d997ab75..a338dad41b7d 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -1,5 +1,6 @@
1/* (C) 1999-2001 Paul `Rusty' Russell 1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -187,8 +188,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
187 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); 188 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
188 if (icmph == NULL) { 189 if (icmph == NULL) {
189 if (LOG_INVALID(net, IPPROTO_ICMP)) 190 if (LOG_INVALID(net, IPPROTO_ICMP))
190 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, 191 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
191 "nf_ct_icmp: short packet "); 192 NULL, "nf_ct_icmp: short packet ");
192 return -NF_ACCEPT; 193 return -NF_ACCEPT;
193 } 194 }
194 195
@@ -196,7 +197,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
196 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 197 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
197 nf_ip_checksum(skb, hooknum, dataoff, 0)) { 198 nf_ip_checksum(skb, hooknum, dataoff, 0)) {
198 if (LOG_INVALID(net, IPPROTO_ICMP)) 199 if (LOG_INVALID(net, IPPROTO_ICMP))
199 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, 200 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
200 "nf_ct_icmp: bad HW ICMP checksum "); 201 "nf_ct_icmp: bad HW ICMP checksum ");
201 return -NF_ACCEPT; 202 return -NF_ACCEPT;
202 } 203 }
@@ -209,7 +210,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
209 */ 210 */
210 if (icmph->type > NR_ICMP_TYPES) { 211 if (icmph->type > NR_ICMP_TYPES) {
211 if (LOG_INVALID(net, IPPROTO_ICMP)) 212 if (LOG_INVALID(net, IPPROTO_ICMP))
212 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, 213 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
213 "nf_ct_icmp: invalid ICMP type "); 214 "nf_ct_icmp: invalid ICMP type ");
214 return -NF_ACCEPT; 215 return -NF_ACCEPT;
215 } 216 }
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9c3db10b22d3..9eea059dd621 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -2,6 +2,7 @@
2 * H.323 extension for NAT alteration. 2 * H.323 extension for NAT alteration.
3 * 3 *
4 * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> 4 * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
5 * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This source code is licensed under General Public License version 2. 7 * This source code is licensed under General Public License version 2.
7 * 8 *
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index a06d7d74817d..657d2307f031 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -13,6 +13,8 @@
13 * 13 *
14 * Development of this code funded by Astaro AG (http://www.astaro.com/) 14 * Development of this code funded by Astaro AG (http://www.astaro.com/)
15 * 15 *
16 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
17 *
16 * TODO: - NAT to a unique tuple, not to TCP source port 18 * TODO: - NAT to a unique tuple, not to TCP source port
17 * (needs netfilter tuple reservation) 19 * (needs netfilter tuple reservation)
18 */ 20 */
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index ea44f02563b5..690d890111bb 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -21,6 +21,8 @@
21 * 21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/) 22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 * 23 *
24 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
25 *
24 */ 26 */
25 27
26#include <linux/module.h> 28#include <linux/module.h>
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index bac712293fd6..5f011cc89cd9 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -38,6 +38,8 @@
38 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 38 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
39 * 39 *
40 * Author: James Morris <jmorris@intercode.com.au> 40 * Author: James Morris <jmorris@intercode.com.au>
41 *
42 * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
41 */ 43 */
42#include <linux/module.h> 44#include <linux/module.h>
43#include <linux/moduleparam.h> 45#include <linux/moduleparam.h>
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2e91006d6076..7d93d62cd5fd 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -514,9 +514,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
514 ipc.opt = NULL; 514 ipc.opt = NULL;
515 ipc.oif = sk->sk_bound_dev_if; 515 ipc.oif = sk->sk_bound_dev_if;
516 ipc.tx_flags = 0; 516 ipc.tx_flags = 0;
517 err = sock_tx_timestamp(sk, &ipc.tx_flags); 517
518 if (err) 518 sock_tx_timestamp(sk, &ipc.tx_flags);
519 return err;
520 519
521 if (msg->msg_controllen) { 520 if (msg->msg_controllen) {
522 err = ip_cmsg_send(sock_net(sk), msg, &ipc); 521 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 32030a24e776..6da51d55d03a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -224,6 +224,8 @@ static const struct snmp_mib snmp4_net_list[] = {
224 SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS), 224 SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),
225 SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), 225 SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
226 SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), 226 SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
227 SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES),
228 SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),
227 SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), 229 SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
228 SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), 230 SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
229 SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), 231 SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
@@ -267,6 +269,7 @@ static const struct snmp_mib snmp4_net_list[] = {
267 SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), 269 SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
268 SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), 270 SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
269 SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), 271 SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
272 SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
270 SNMP_MIB_SENTINEL 273 SNMP_MIB_SENTINEL
271}; 274};
272 275
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6e2851464f8f..550781a17b34 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2311,7 +2311,7 @@ nla_put_failure:
2311 return -EMSGSIZE; 2311 return -EMSGSIZE;
2312} 2312}
2313 2313
2314static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) 2314static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2315{ 2315{
2316 struct net *net = sock_net(in_skb->sk); 2316 struct net *net = sock_net(in_skb->sk);
2317 struct rtmsg *rtm; 2317 struct rtmsg *rtm;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 397e0f69435f..b05c96e7af8b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -267,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
267 struct ip_options *opt) 267 struct ip_options *opt)
268{ 268{
269 struct tcp_options_received tcp_opt; 269 struct tcp_options_received tcp_opt;
270 const u8 *hash_location;
271 struct inet_request_sock *ireq; 270 struct inet_request_sock *ireq;
272 struct tcp_request_sock *treq; 271 struct tcp_request_sock *treq;
273 struct tcp_sock *tp = tcp_sk(sk); 272 struct tcp_sock *tp = tcp_sk(sk);
@@ -294,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
294 293
295 /* check for timestamp cookie support */ 294 /* check for timestamp cookie support */
296 memset(&tcp_opt, 0, sizeof(tcp_opt)); 295 memset(&tcp_opt, 0, sizeof(tcp_opt));
297 tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); 296 tcp_parse_options(skb, &tcp_opt, 0, NULL);
298 297
299 if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) 298 if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
300 goto out; 299 goto out;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 960fd29d9b8e..fa2f63fc453b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -28,7 +28,7 @@
28 28
29static int zero; 29static int zero;
30static int one = 1; 30static int one = 1;
31static int two = 2; 31static int four = 4;
32static int tcp_retr1_max = 255; 32static int tcp_retr1_max = 255;
33static int ip_local_port_range_min[] = { 1, 1 }; 33static int ip_local_port_range_min[] = { 1, 1 };
34static int ip_local_port_range_max[] = { 65535, 65535 }; 34static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -592,13 +592,6 @@ static struct ctl_table ipv4_table[] = {
592 .proc_handler = proc_dointvec 592 .proc_handler = proc_dointvec
593 }, 593 },
594 { 594 {
595 .procname = "tcp_frto_response",
596 .data = &sysctl_tcp_frto_response,
597 .maxlen = sizeof(int),
598 .mode = 0644,
599 .proc_handler = proc_dointvec
600 },
601 {
602 .procname = "tcp_low_latency", 595 .procname = "tcp_low_latency",
603 .data = &sysctl_tcp_low_latency, 596 .data = &sysctl_tcp_low_latency,
604 .maxlen = sizeof(int), 597 .maxlen = sizeof(int),
@@ -733,13 +726,6 @@ static struct ctl_table ipv4_table[] = {
733 .proc_handler = proc_dointvec, 726 .proc_handler = proc_dointvec,
734 }, 727 },
735 { 728 {
736 .procname = "tcp_cookie_size",
737 .data = &sysctl_tcp_cookie_size,
738 .maxlen = sizeof(int),
739 .mode = 0644,
740 .proc_handler = proc_dointvec
741 },
742 {
743 .procname = "tcp_thin_linear_timeouts", 729 .procname = "tcp_thin_linear_timeouts",
744 .data = &sysctl_tcp_thin_linear_timeouts, 730 .data = &sysctl_tcp_thin_linear_timeouts,
745 .maxlen = sizeof(int), 731 .maxlen = sizeof(int),
@@ -760,7 +746,7 @@ static struct ctl_table ipv4_table[] = {
760 .mode = 0644, 746 .mode = 0644,
761 .proc_handler = proc_dointvec_minmax, 747 .proc_handler = proc_dointvec_minmax,
762 .extra1 = &zero, 748 .extra1 = &zero,
763 .extra2 = &two, 749 .extra2 = &four,
764 }, 750 },
765 { 751 {
766 .procname = "udp_mem", 752 .procname = "udp_mem",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e22020790709..dcb116dde216 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk)
409 409
410 icsk->icsk_sync_mss = tcp_sync_mss; 410 icsk->icsk_sync_mss = tcp_sync_mss;
411 411
412 /* TCP Cookie Transactions */
413 if (sysctl_tcp_cookie_size > 0) {
414 /* Default, cookies without s_data_payload. */
415 tp->cookie_values =
416 kzalloc(sizeof(*tp->cookie_values),
417 sk->sk_allocation);
418 if (tp->cookie_values != NULL)
419 kref_init(&tp->cookie_values->kref);
420 }
421 /* Presumed zeroed, in order of appearance: 412 /* Presumed zeroed, in order of appearance:
422 * cookie_in_always, cookie_out_never, 413 * cookie_in_always, cookie_out_never,
423 * s_data_constant, s_data_in, s_data_out 414 * s_data_constant, s_data_in, s_data_out
@@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2397 release_sock(sk); 2388 release_sock(sk);
2398 return err; 2389 return err;
2399 } 2390 }
2400 case TCP_COOKIE_TRANSACTIONS: {
2401 struct tcp_cookie_transactions ctd;
2402 struct tcp_cookie_values *cvp = NULL;
2403
2404 if (sizeof(ctd) > optlen)
2405 return -EINVAL;
2406 if (copy_from_user(&ctd, optval, sizeof(ctd)))
2407 return -EFAULT;
2408
2409 if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
2410 ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
2411 return -EINVAL;
2412
2413 if (ctd.tcpct_cookie_desired == 0) {
2414 /* default to global value */
2415 } else if ((0x1 & ctd.tcpct_cookie_desired) ||
2416 ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
2417 ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
2418 return -EINVAL;
2419 }
2420
2421 if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
2422 /* Supercedes all other values */
2423 lock_sock(sk);
2424 if (tp->cookie_values != NULL) {
2425 kref_put(&tp->cookie_values->kref,
2426 tcp_cookie_values_release);
2427 tp->cookie_values = NULL;
2428 }
2429 tp->rx_opt.cookie_in_always = 0; /* false */
2430 tp->rx_opt.cookie_out_never = 1; /* true */
2431 release_sock(sk);
2432 return err;
2433 }
2434
2435 /* Allocate ancillary memory before locking.
2436 */
2437 if (ctd.tcpct_used > 0 ||
2438 (tp->cookie_values == NULL &&
2439 (sysctl_tcp_cookie_size > 0 ||
2440 ctd.tcpct_cookie_desired > 0 ||
2441 ctd.tcpct_s_data_desired > 0))) {
2442 cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
2443 GFP_KERNEL);
2444 if (cvp == NULL)
2445 return -ENOMEM;
2446
2447 kref_init(&cvp->kref);
2448 }
2449 lock_sock(sk);
2450 tp->rx_opt.cookie_in_always =
2451 (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
2452 tp->rx_opt.cookie_out_never = 0; /* false */
2453
2454 if (tp->cookie_values != NULL) {
2455 if (cvp != NULL) {
2456 /* Changed values are recorded by a changed
2457 * pointer, ensuring the cookie will differ,
2458 * without separately hashing each value later.
2459 */
2460 kref_put(&tp->cookie_values->kref,
2461 tcp_cookie_values_release);
2462 } else {
2463 cvp = tp->cookie_values;
2464 }
2465 }
2466
2467 if (cvp != NULL) {
2468 cvp->cookie_desired = ctd.tcpct_cookie_desired;
2469
2470 if (ctd.tcpct_used > 0) {
2471 memcpy(cvp->s_data_payload, ctd.tcpct_value,
2472 ctd.tcpct_used);
2473 cvp->s_data_desired = ctd.tcpct_used;
2474 cvp->s_data_constant = 1; /* true */
2475 } else {
2476 /* No constant payload data. */
2477 cvp->s_data_desired = ctd.tcpct_s_data_desired;
2478 cvp->s_data_constant = 0; /* false */
2479 }
2480
2481 tp->cookie_values = cvp;
2482 }
2483 release_sock(sk);
2484 return err;
2485 }
2486 default: 2391 default:
2487 /* fallthru */ 2392 /* fallthru */
2488 break; 2393 break;
@@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2902 return -EFAULT; 2807 return -EFAULT;
2903 return 0; 2808 return 0;
2904 2809
2905 case TCP_COOKIE_TRANSACTIONS: {
2906 struct tcp_cookie_transactions ctd;
2907 struct tcp_cookie_values *cvp = tp->cookie_values;
2908
2909 if (get_user(len, optlen))
2910 return -EFAULT;
2911 if (len < sizeof(ctd))
2912 return -EINVAL;
2913
2914 memset(&ctd, 0, sizeof(ctd));
2915 ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
2916 TCP_COOKIE_IN_ALWAYS : 0)
2917 | (tp->rx_opt.cookie_out_never ?
2918 TCP_COOKIE_OUT_NEVER : 0);
2919
2920 if (cvp != NULL) {
2921 ctd.tcpct_flags |= (cvp->s_data_in ?
2922 TCP_S_DATA_IN : 0)
2923 | (cvp->s_data_out ?
2924 TCP_S_DATA_OUT : 0);
2925
2926 ctd.tcpct_cookie_desired = cvp->cookie_desired;
2927 ctd.tcpct_s_data_desired = cvp->s_data_desired;
2928
2929 memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
2930 cvp->cookie_pair_size);
2931 ctd.tcpct_used = cvp->cookie_pair_size;
2932 }
2933
2934 if (put_user(sizeof(ctd), optlen))
2935 return -EFAULT;
2936 if (copy_to_user(optval, &ctd, sizeof(ctd)))
2937 return -EFAULT;
2938 return 0;
2939 }
2940 case TCP_THIN_LINEAR_TIMEOUTS: 2810 case TCP_THIN_LINEAR_TIMEOUTS:
2941 val = tp->thin_lto; 2811 val = tp->thin_lto;
2942 break; 2812 break;
@@ -3015,6 +2885,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3015 __be32 delta; 2885 __be32 delta;
3016 unsigned int oldlen; 2886 unsigned int oldlen;
3017 unsigned int mss; 2887 unsigned int mss;
2888 struct sk_buff *gso_skb = skb;
2889 __sum16 newcheck;
3018 2890
3019 if (!pskb_may_pull(skb, sizeof(*th))) 2891 if (!pskb_may_pull(skb, sizeof(*th)))
3020 goto out; 2892 goto out;
@@ -3044,6 +2916,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3044 SKB_GSO_TCP_ECN | 2916 SKB_GSO_TCP_ECN |
3045 SKB_GSO_TCPV6 | 2917 SKB_GSO_TCPV6 |
3046 SKB_GSO_GRE | 2918 SKB_GSO_GRE |
2919 SKB_GSO_UDP_TUNNEL |
3047 0) || 2920 0) ||
3048 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) 2921 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
3049 goto out; 2922 goto out;
@@ -3064,11 +2937,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3064 th = tcp_hdr(skb); 2937 th = tcp_hdr(skb);
3065 seq = ntohl(th->seq); 2938 seq = ntohl(th->seq);
3066 2939
2940 newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
2941 (__force u32)delta));
2942
3067 do { 2943 do {
3068 th->fin = th->psh = 0; 2944 th->fin = th->psh = 0;
2945 th->check = newcheck;
3069 2946
3070 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
3071 (__force u32)delta));
3072 if (skb->ip_summed != CHECKSUM_PARTIAL) 2947 if (skb->ip_summed != CHECKSUM_PARTIAL)
3073 th->check = 2948 th->check =
3074 csum_fold(csum_partial(skb_transport_header(skb), 2949 csum_fold(csum_partial(skb_transport_header(skb),
@@ -3082,6 +2957,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3082 th->cwr = 0; 2957 th->cwr = 0;
3083 } while (skb->next); 2958 } while (skb->next);
3084 2959
2960 /* Following permits TCP Small Queues to work well with GSO :
2961 * The callback to TCP stack will be called at the time last frag
2962 * is freed at TX completion, and not right now when gso_skb
2963 * is freed by GSO engine
2964 */
2965 if (gso_skb->destructor == tcp_wfree) {
2966 swap(gso_skb->sk, skb->sk);
2967 swap(gso_skb->destructor, skb->destructor);
2968 swap(gso_skb->truesize, skb->truesize);
2969 }
2970
3085 delta = htonl(oldlen + (skb->tail - skb->transport_header) + 2971 delta = htonl(oldlen + (skb->tail - skb->transport_header) +
3086 skb->data_len); 2972 skb->data_len);
3087 th->check = ~csum_fold((__force __wsum)((__force u32)th->check + 2973 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
@@ -3408,134 +3294,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
3408 3294
3409#endif 3295#endif
3410 3296
3411/* Each Responder maintains up to two secret values concurrently for
3412 * efficient secret rollover. Each secret value has 4 states:
3413 *
3414 * Generating. (tcp_secret_generating != tcp_secret_primary)
3415 * Generates new Responder-Cookies, but not yet used for primary
3416 * verification. This is a short-term state, typically lasting only
3417 * one round trip time (RTT).
3418 *
3419 * Primary. (tcp_secret_generating == tcp_secret_primary)
3420 * Used both for generation and primary verification.
3421 *
3422 * Retiring. (tcp_secret_retiring != tcp_secret_secondary)
3423 * Used for verification, until the first failure that can be
3424 * verified by the newer Generating secret. At that time, this
3425 * cookie's state is changed to Secondary, and the Generating
3426 * cookie's state is changed to Primary. This is a short-term state,
3427 * typically lasting only one round trip time (RTT).
3428 *
3429 * Secondary. (tcp_secret_retiring == tcp_secret_secondary)
3430 * Used for secondary verification, after primary verification
3431 * failures. This state lasts no more than twice the Maximum Segment
3432 * Lifetime (2MSL). Then, the secret is discarded.
3433 */
3434struct tcp_cookie_secret {
3435 /* The secret is divided into two parts. The digest part is the
3436 * equivalent of previously hashing a secret and saving the state,
3437 * and serves as an initialization vector (IV). The message part
3438 * serves as the trailing secret.
3439 */
3440 u32 secrets[COOKIE_WORKSPACE_WORDS];
3441 unsigned long expires;
3442};
3443
3444#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
3445#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
3446#define TCP_SECRET_LIFE (HZ * 600)
3447
3448static struct tcp_cookie_secret tcp_secret_one;
3449static struct tcp_cookie_secret tcp_secret_two;
3450
3451/* Essentially a circular list, without dynamic allocation. */
3452static struct tcp_cookie_secret *tcp_secret_generating;
3453static struct tcp_cookie_secret *tcp_secret_primary;
3454static struct tcp_cookie_secret *tcp_secret_retiring;
3455static struct tcp_cookie_secret *tcp_secret_secondary;
3456
3457static DEFINE_SPINLOCK(tcp_secret_locker);
3458
3459/* Select a pseudo-random word in the cookie workspace.
3460 */
3461static inline u32 tcp_cookie_work(const u32 *ws, const int n)
3462{
3463 return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
3464}
3465
3466/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
3467 * Called in softirq context.
3468 * Returns: 0 for success.
3469 */
3470int tcp_cookie_generator(u32 *bakery)
3471{
3472 unsigned long jiffy = jiffies;
3473
3474 if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
3475 spin_lock_bh(&tcp_secret_locker);
3476 if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
3477 /* refreshed by another */
3478 memcpy(bakery,
3479 &tcp_secret_generating->secrets[0],
3480 COOKIE_WORKSPACE_WORDS);
3481 } else {
3482 /* still needs refreshing */
3483 get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
3484
3485 /* The first time, paranoia assumes that the
3486 * randomization function isn't as strong. But,
3487 * this secret initialization is delayed until
3488 * the last possible moment (packet arrival).
3489 * Although that time is observable, it is
3490 * unpredictably variable. Mash in the most
3491 * volatile clock bits available, and expire the
3492 * secret extra quickly.
3493 */
3494 if (unlikely(tcp_secret_primary->expires ==
3495 tcp_secret_secondary->expires)) {
3496 struct timespec tv;
3497
3498 getnstimeofday(&tv);
3499 bakery[COOKIE_DIGEST_WORDS+0] ^=
3500 (u32)tv.tv_nsec;
3501
3502 tcp_secret_secondary->expires = jiffy
3503 + TCP_SECRET_1MSL
3504 + (0x0f & tcp_cookie_work(bakery, 0));
3505 } else {
3506 tcp_secret_secondary->expires = jiffy
3507 + TCP_SECRET_LIFE
3508 + (0xff & tcp_cookie_work(bakery, 1));
3509 tcp_secret_primary->expires = jiffy
3510 + TCP_SECRET_2MSL
3511 + (0x1f & tcp_cookie_work(bakery, 2));
3512 }
3513 memcpy(&tcp_secret_secondary->secrets[0],
3514 bakery, COOKIE_WORKSPACE_WORDS);
3515
3516 rcu_assign_pointer(tcp_secret_generating,
3517 tcp_secret_secondary);
3518 rcu_assign_pointer(tcp_secret_retiring,
3519 tcp_secret_primary);
3520 /*
3521 * Neither call_rcu() nor synchronize_rcu() needed.
3522 * Retiring data is not freed. It is replaced after
3523 * further (locked) pointer updates, and a quiet time
3524 * (minimum 1MSL, maximum LIFE - 2MSL).
3525 */
3526 }
3527 spin_unlock_bh(&tcp_secret_locker);
3528 } else {
3529 rcu_read_lock_bh();
3530 memcpy(bakery,
3531 &rcu_dereference(tcp_secret_generating)->secrets[0],
3532 COOKIE_WORKSPACE_WORDS);
3533 rcu_read_unlock_bh();
3534 }
3535 return 0;
3536}
3537EXPORT_SYMBOL(tcp_cookie_generator);
3538
3539void tcp_done(struct sock *sk) 3297void tcp_done(struct sock *sk)
3540{ 3298{
3541 struct request_sock *req = tcp_sk(sk)->fastopen_rsk; 3299 struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
@@ -3590,7 +3348,6 @@ void __init tcp_init(void)
3590 unsigned long limit; 3348 unsigned long limit;
3591 int max_rshare, max_wshare, cnt; 3349 int max_rshare, max_wshare, cnt;
3592 unsigned int i; 3350 unsigned int i;
3593 unsigned long jiffy = jiffies;
3594 3351
3595 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); 3352 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
3596 3353
@@ -3666,13 +3423,5 @@ void __init tcp_init(void)
3666 3423
3667 tcp_register_congestion_control(&tcp_reno); 3424 tcp_register_congestion_control(&tcp_reno);
3668 3425
3669 memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
3670 memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
3671 tcp_secret_one.expires = jiffy; /* past due */
3672 tcp_secret_two.expires = jiffy; /* past due */
3673 tcp_secret_generating = &tcp_secret_one;
3674 tcp_secret_primary = &tcp_secret_one;
3675 tcp_secret_retiring = &tcp_secret_two;
3676 tcp_secret_secondary = &tcp_secret_two;
3677 tcp_tasklet_init(); 3426 tcp_tasklet_init();
3678} 3427}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 13b9c08fc158..aafd052865ba 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -93,12 +93,11 @@ int sysctl_tcp_stdurg __read_mostly;
93int sysctl_tcp_rfc1337 __read_mostly; 93int sysctl_tcp_rfc1337 __read_mostly;
94int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 94int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
95int sysctl_tcp_frto __read_mostly = 2; 95int sysctl_tcp_frto __read_mostly = 2;
96int sysctl_tcp_frto_response __read_mostly;
97 96
98int sysctl_tcp_thin_dupack __read_mostly; 97int sysctl_tcp_thin_dupack __read_mostly;
99 98
100int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; 99int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
101int sysctl_tcp_early_retrans __read_mostly = 2; 100int sysctl_tcp_early_retrans __read_mostly = 3;
102 101
103#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 102#define FLAG_DATA 0x01 /* Incoming frame contained data. */
104#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 103#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -108,10 +107,9 @@ int sysctl_tcp_early_retrans __read_mostly = 2;
108#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 107#define FLAG_DATA_SACKED 0x20 /* New SACK. */
109#define FLAG_ECE 0x40 /* ECE in this ACK */ 108#define FLAG_ECE 0x40 /* ECE in this ACK */
110#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 109#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
111#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ 110#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
112#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 111#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
113#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 112#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
114#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */
115#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 113#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
116#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ 114#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
117 115
@@ -119,7 +117,6 @@ int sysctl_tcp_early_retrans __read_mostly = 2;
119#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 117#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
120#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) 118#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
121#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) 119#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
122#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
123 120
124#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 121#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
125#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) 122#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
@@ -1160,10 +1157,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
1160 tcp_highest_sack_seq(tp))) 1157 tcp_highest_sack_seq(tp)))
1161 state->reord = min(fack_count, 1158 state->reord = min(fack_count,
1162 state->reord); 1159 state->reord);
1163 1160 if (!after(end_seq, tp->high_seq))
1164 /* SACK enhanced F-RTO (RFC4138; Appendix B) */ 1161 state->flag |= FLAG_ORIG_SACK_ACKED;
1165 if (!after(end_seq, tp->frto_highmark))
1166 state->flag |= FLAG_ONLY_ORIG_SACKED;
1167 } 1162 }
1168 1163
1169 if (sacked & TCPCB_LOST) { 1164 if (sacked & TCPCB_LOST) {
@@ -1556,7 +1551,6 @@ static int
1556tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1551tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1557 u32 prior_snd_una) 1552 u32 prior_snd_una)
1558{ 1553{
1559 const struct inet_connection_sock *icsk = inet_csk(sk);
1560 struct tcp_sock *tp = tcp_sk(sk); 1554 struct tcp_sock *tp = tcp_sk(sk);
1561 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1555 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1562 TCP_SKB_CB(ack_skb)->sacked); 1556 TCP_SKB_CB(ack_skb)->sacked);
@@ -1729,12 +1723,6 @@ walk:
1729 start_seq, end_seq, dup_sack); 1723 start_seq, end_seq, dup_sack);
1730 1724
1731advance_sp: 1725advance_sp:
1732 /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
1733 * due to in-order walk
1734 */
1735 if (after(end_seq, tp->frto_highmark))
1736 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1737
1738 i++; 1726 i++;
1739 } 1727 }
1740 1728
@@ -1751,8 +1739,7 @@ advance_sp:
1751 tcp_verify_left_out(tp); 1739 tcp_verify_left_out(tp);
1752 1740
1753 if ((state.reord < tp->fackets_out) && 1741 if ((state.reord < tp->fackets_out) &&
1754 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && 1742 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1755 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1756 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); 1743 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1757 1744
1758out: 1745out:
@@ -1826,197 +1813,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1826 tp->sacked_out = 0; 1813 tp->sacked_out = 0;
1827} 1814}
1828 1815
1829static int tcp_is_sackfrto(const struct tcp_sock *tp)
1830{
1831 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1832}
1833
1834/* F-RTO can only be used if TCP has never retransmitted anything other than
1835 * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
1836 */
1837bool tcp_use_frto(struct sock *sk)
1838{
1839 const struct tcp_sock *tp = tcp_sk(sk);
1840 const struct inet_connection_sock *icsk = inet_csk(sk);
1841 struct sk_buff *skb;
1842
1843 if (!sysctl_tcp_frto)
1844 return false;
1845
1846 /* MTU probe and F-RTO won't really play nicely along currently */
1847 if (icsk->icsk_mtup.probe_size)
1848 return false;
1849
1850 if (tcp_is_sackfrto(tp))
1851 return true;
1852
1853 /* Avoid expensive walking of rexmit queue if possible */
1854 if (tp->retrans_out > 1)
1855 return false;
1856
1857 skb = tcp_write_queue_head(sk);
1858 if (tcp_skb_is_last(sk, skb))
1859 return true;
1860 skb = tcp_write_queue_next(sk, skb); /* Skips head */
1861 tcp_for_write_queue_from(skb, sk) {
1862 if (skb == tcp_send_head(sk))
1863 break;
1864 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1865 return false;
1866 /* Short-circuit when first non-SACKed skb has been checked */
1867 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1868 break;
1869 }
1870 return true;
1871}
1872
1873/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
1874 * recovery a bit and use heuristics in tcp_process_frto() to detect if
1875 * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
1876 * keep retrans_out counting accurate (with SACK F-RTO, other than head
1877 * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
1878 * bits are handled if the Loss state is really to be entered (in
1879 * tcp_enter_frto_loss).
1880 *
1881 * Do like tcp_enter_loss() would; when RTO expires the second time it
1882 * does:
1883 * "Reduce ssthresh if it has not yet been made inside this window."
1884 */
1885void tcp_enter_frto(struct sock *sk)
1886{
1887 const struct inet_connection_sock *icsk = inet_csk(sk);
1888 struct tcp_sock *tp = tcp_sk(sk);
1889 struct sk_buff *skb;
1890
1891 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
1892 tp->snd_una == tp->high_seq ||
1893 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
1894 !icsk->icsk_retransmits)) {
1895 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1896 /* Our state is too optimistic in ssthresh() call because cwnd
1897 * is not reduced until tcp_enter_frto_loss() when previous F-RTO
1898 * recovery has not yet completed. Pattern would be this: RTO,
1899 * Cumulative ACK, RTO (2xRTO for the same segment does not end
1900 * up here twice).
1901 * RFC4138 should be more specific on what to do, even though
1902 * RTO is quite unlikely to occur after the first Cumulative ACK
1903 * due to back-off and complexity of triggering events ...
1904 */
1905 if (tp->frto_counter) {
1906 u32 stored_cwnd;
1907 stored_cwnd = tp->snd_cwnd;
1908 tp->snd_cwnd = 2;
1909 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1910 tp->snd_cwnd = stored_cwnd;
1911 } else {
1912 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1913 }
1914 /* ... in theory, cong.control module could do "any tricks" in
1915 * ssthresh(), which means that ca_state, lost bits and lost_out
1916 * counter would have to be faked before the call occurs. We
1917 * consider that too expensive, unlikely and hacky, so modules
1918 * using these in ssthresh() must deal these incompatibility
1919 * issues if they receives CA_EVENT_FRTO and frto_counter != 0
1920 */
1921 tcp_ca_event(sk, CA_EVENT_FRTO);
1922 }
1923
1924 tp->undo_marker = tp->snd_una;
1925 tp->undo_retrans = 0;
1926
1927 skb = tcp_write_queue_head(sk);
1928 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1929 tp->undo_marker = 0;
1930 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1931 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1932 tp->retrans_out -= tcp_skb_pcount(skb);
1933 }
1934 tcp_verify_left_out(tp);
1935
1936 /* Too bad if TCP was application limited */
1937 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
1938
1939 /* Earlier loss recovery underway (see RFC4138; Appendix B).
1940 * The last condition is necessary at least in tp->frto_counter case.
1941 */
1942 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
1943 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
1944 after(tp->high_seq, tp->snd_una)) {
1945 tp->frto_highmark = tp->high_seq;
1946 } else {
1947 tp->frto_highmark = tp->snd_nxt;
1948 }
1949 tcp_set_ca_state(sk, TCP_CA_Disorder);
1950 tp->high_seq = tp->snd_nxt;
1951 tp->frto_counter = 1;
1952}
1953
1954/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
1955 * which indicates that we should follow the traditional RTO recovery,
1956 * i.e. mark everything lost and do go-back-N retransmission.
1957 */
1958static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1959{
1960 struct tcp_sock *tp = tcp_sk(sk);
1961 struct sk_buff *skb;
1962
1963 tp->lost_out = 0;
1964 tp->retrans_out = 0;
1965 if (tcp_is_reno(tp))
1966 tcp_reset_reno_sack(tp);
1967
1968 tcp_for_write_queue(skb, sk) {
1969 if (skb == tcp_send_head(sk))
1970 break;
1971
1972 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1973 /*
1974 * Count the retransmission made on RTO correctly (only when
1975 * waiting for the first ACK and did not get it)...
1976 */
1977 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
1978 /* For some reason this R-bit might get cleared? */
1979 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1980 tp->retrans_out += tcp_skb_pcount(skb);
1981 /* ...enter this if branch just for the first segment */
1982 flag |= FLAG_DATA_ACKED;
1983 } else {
1984 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1985 tp->undo_marker = 0;
1986 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1987 }
1988
1989 /* Marking forward transmissions that were made after RTO lost
1990 * can cause unnecessary retransmissions in some scenarios,
1991 * SACK blocks will mitigate that in some but not in all cases.
1992 * We used to not mark them but it was causing break-ups with
1993 * receivers that do only in-order receival.
1994 *
1995 * TODO: we could detect presence of such receiver and select
1996 * different behavior per flow.
1997 */
1998 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1999 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2000 tp->lost_out += tcp_skb_pcount(skb);
2001 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2002 }
2003 }
2004 tcp_verify_left_out(tp);
2005
2006 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2007 tp->snd_cwnd_cnt = 0;
2008 tp->snd_cwnd_stamp = tcp_time_stamp;
2009 tp->frto_counter = 0;
2010
2011 tp->reordering = min_t(unsigned int, tp->reordering,
2012 sysctl_tcp_reordering);
2013 tcp_set_ca_state(sk, TCP_CA_Loss);
2014 tp->high_seq = tp->snd_nxt;
2015 TCP_ECN_queue_cwr(tp);
2016
2017 tcp_clear_all_retrans_hints(tp);
2018}
2019
2020static void tcp_clear_retrans_partial(struct tcp_sock *tp) 1816static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2021{ 1817{
2022 tp->retrans_out = 0; 1818 tp->retrans_out = 0;
@@ -2043,10 +1839,13 @@ void tcp_enter_loss(struct sock *sk, int how)
2043 const struct inet_connection_sock *icsk = inet_csk(sk); 1839 const struct inet_connection_sock *icsk = inet_csk(sk);
2044 struct tcp_sock *tp = tcp_sk(sk); 1840 struct tcp_sock *tp = tcp_sk(sk);
2045 struct sk_buff *skb; 1841 struct sk_buff *skb;
1842 bool new_recovery = false;
2046 1843
2047 /* Reduce ssthresh if it has not yet been made inside this window. */ 1844 /* Reduce ssthresh if it has not yet been made inside this window. */
2048 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || 1845 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1846 !after(tp->high_seq, tp->snd_una) ||
2049 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { 1847 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1848 new_recovery = true;
2050 tp->prior_ssthresh = tcp_current_ssthresh(sk); 1849 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2051 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 1850 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2052 tcp_ca_event(sk, CA_EVENT_LOSS); 1851 tcp_ca_event(sk, CA_EVENT_LOSS);
@@ -2088,8 +1887,14 @@ void tcp_enter_loss(struct sock *sk, int how)
2088 tcp_set_ca_state(sk, TCP_CA_Loss); 1887 tcp_set_ca_state(sk, TCP_CA_Loss);
2089 tp->high_seq = tp->snd_nxt; 1888 tp->high_seq = tp->snd_nxt;
2090 TCP_ECN_queue_cwr(tp); 1889 TCP_ECN_queue_cwr(tp);
2091 /* Abort F-RTO algorithm if one is in progress */ 1890
2092 tp->frto_counter = 0; 1891 /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
1892 * loss recovery is underway except recurring timeout(s) on
1893 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
1894 */
1895 tp->frto = sysctl_tcp_frto &&
1896 (new_recovery || icsk->icsk_retransmits) &&
1897 !inet_csk(sk)->icsk_mtup.probe_size;
2093} 1898}
2094 1899
2095/* If ACK arrived pointing to a remembered SACK, it means that our 1900/* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2148,15 +1953,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2148 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples 1953 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
2149 * available, or RTO is scheduled to fire first. 1954 * available, or RTO is scheduled to fire first.
2150 */ 1955 */
2151 if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) 1956 if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
1957 (flag & FLAG_ECE) || !tp->srtt)
2152 return false; 1958 return false;
2153 1959
2154 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); 1960 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
2155 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) 1961 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2156 return false; 1962 return false;
2157 1963
2158 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); 1964 inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
2159 tp->early_retrans_delayed = 1; 1965 TCP_RTO_MAX);
2160 return true; 1966 return true;
2161} 1967}
2162 1968
@@ -2272,10 +2078,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2272 struct tcp_sock *tp = tcp_sk(sk); 2078 struct tcp_sock *tp = tcp_sk(sk);
2273 __u32 packets_out; 2079 __u32 packets_out;
2274 2080
2275 /* Do not perform any recovery during F-RTO algorithm */
2276 if (tp->frto_counter)
2277 return false;
2278
2279 /* Trick#1: The loss is proven. */ 2081 /* Trick#1: The loss is proven. */
2280 if (tp->lost_out) 2082 if (tp->lost_out)
2281 return true; 2083 return true;
@@ -2319,7 +2121,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2319 * interval if appropriate. 2121 * interval if appropriate.
2320 */ 2122 */
2321 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && 2123 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2322 (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && 2124 (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
2323 !tcp_may_send_now(sk)) 2125 !tcp_may_send_now(sk))
2324 return !tcp_pause_early_retransmit(sk, flag); 2126 return !tcp_pause_early_retransmit(sk, flag);
2325 2127
@@ -2636,12 +2438,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2636 return failed; 2438 return failed;
2637} 2439}
2638 2440
2639/* Undo during loss recovery after partial ACK. */ 2441/* Undo during loss recovery after partial ACK or using F-RTO. */
2640static bool tcp_try_undo_loss(struct sock *sk) 2442static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2641{ 2443{
2642 struct tcp_sock *tp = tcp_sk(sk); 2444 struct tcp_sock *tp = tcp_sk(sk);
2643 2445
2644 if (tcp_may_undo(tp)) { 2446 if (frto_undo || tcp_may_undo(tp)) {
2645 struct sk_buff *skb; 2447 struct sk_buff *skb;
2646 tcp_for_write_queue(skb, sk) { 2448 tcp_for_write_queue(skb, sk) {
2647 if (skb == tcp_send_head(sk)) 2449 if (skb == tcp_send_head(sk))
@@ -2655,9 +2457,12 @@ static bool tcp_try_undo_loss(struct sock *sk)
2655 tp->lost_out = 0; 2457 tp->lost_out = 0;
2656 tcp_undo_cwr(sk, true); 2458 tcp_undo_cwr(sk, true);
2657 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); 2459 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2460 if (frto_undo)
2461 NET_INC_STATS_BH(sock_net(sk),
2462 LINUX_MIB_TCPSPURIOUSRTOS);
2658 inet_csk(sk)->icsk_retransmits = 0; 2463 inet_csk(sk)->icsk_retransmits = 0;
2659 tp->undo_marker = 0; 2464 tp->undo_marker = 0;
2660 if (tcp_is_sack(tp)) 2465 if (frto_undo || tcp_is_sack(tp))
2661 tcp_set_ca_state(sk, TCP_CA_Open); 2466 tcp_set_ca_state(sk, TCP_CA_Open);
2662 return true; 2467 return true;
2663 } 2468 }
@@ -2679,6 +2484,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2679 struct tcp_sock *tp = tcp_sk(sk); 2484 struct tcp_sock *tp = tcp_sk(sk);
2680 2485
2681 tp->high_seq = tp->snd_nxt; 2486 tp->high_seq = tp->snd_nxt;
2487 tp->tlp_high_seq = 0;
2682 tp->snd_cwnd_cnt = 0; 2488 tp->snd_cwnd_cnt = 0;
2683 tp->prior_cwnd = tp->snd_cwnd; 2489 tp->prior_cwnd = tp->snd_cwnd;
2684 tp->prr_delivered = 0; 2490 tp->prr_delivered = 0;
@@ -2756,7 +2562,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2756 2562
2757 tcp_verify_left_out(tp); 2563 tcp_verify_left_out(tp);
2758 2564
2759 if (!tp->frto_counter && !tcp_any_retrans_done(sk)) 2565 if (!tcp_any_retrans_done(sk))
2760 tp->retrans_stamp = 0; 2566 tp->retrans_stamp = 0;
2761 2567
2762 if (flag & FLAG_ECE) 2568 if (flag & FLAG_ECE)
@@ -2873,6 +2679,58 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2873 tcp_set_ca_state(sk, TCP_CA_Recovery); 2679 tcp_set_ca_state(sk, TCP_CA_Recovery);
2874} 2680}
2875 2681
2682/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
2683 * recovered or spurious. Otherwise retransmits more on partial ACKs.
2684 */
2685static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2686{
2687 struct inet_connection_sock *icsk = inet_csk(sk);
2688 struct tcp_sock *tp = tcp_sk(sk);
2689 bool recovered = !before(tp->snd_una, tp->high_seq);
2690
2691 if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
2692 if (flag & FLAG_ORIG_SACK_ACKED) {
2693 /* Step 3.b. A timeout is spurious if not all data are
2694 * lost, i.e., never-retransmitted data are (s)acked.
2695 */
2696 tcp_try_undo_loss(sk, true);
2697 return;
2698 }
2699 if (after(tp->snd_nxt, tp->high_seq) &&
2700 (flag & FLAG_DATA_SACKED || is_dupack)) {
2701 tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
2702 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2703 tp->high_seq = tp->snd_nxt;
2704 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
2705 TCP_NAGLE_OFF);
2706 if (after(tp->snd_nxt, tp->high_seq))
2707 return; /* Step 2.b */
2708 tp->frto = 0;
2709 }
2710 }
2711
2712 if (recovered) {
2713 /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
2714 icsk->icsk_retransmits = 0;
2715 tcp_try_undo_recovery(sk);
2716 return;
2717 }
2718 if (flag & FLAG_DATA_ACKED)
2719 icsk->icsk_retransmits = 0;
2720 if (tcp_is_reno(tp)) {
2721 /* A Reno DUPACK means new data in F-RTO step 2.b above are
2722 * delivered. Lower inflight to clock out (re)tranmissions.
2723 */
2724 if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
2725 tcp_add_reno_sack(sk);
2726 else if (flag & FLAG_SND_UNA_ADVANCED)
2727 tcp_reset_reno_sack(tp);
2728 }
2729 if (tcp_try_undo_loss(sk, false))
2730 return;
2731 tcp_xmit_retransmit_queue(sk);
2732}
2733
2876/* Process an event, which can update packets-in-flight not trivially. 2734/* Process an event, which can update packets-in-flight not trivially.
2877 * Main goal of this function is to calculate new estimate for left_out, 2735 * Main goal of this function is to calculate new estimate for left_out,
2878 * taking into account both packets sitting in receiver's buffer and 2736 * taking into account both packets sitting in receiver's buffer and
@@ -2919,12 +2777,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2919 tp->retrans_stamp = 0; 2777 tp->retrans_stamp = 0;
2920 } else if (!before(tp->snd_una, tp->high_seq)) { 2778 } else if (!before(tp->snd_una, tp->high_seq)) {
2921 switch (icsk->icsk_ca_state) { 2779 switch (icsk->icsk_ca_state) {
2922 case TCP_CA_Loss:
2923 icsk->icsk_retransmits = 0;
2924 if (tcp_try_undo_recovery(sk))
2925 return;
2926 break;
2927
2928 case TCP_CA_CWR: 2780 case TCP_CA_CWR:
2929 /* CWR is to be held something *above* high_seq 2781 /* CWR is to be held something *above* high_seq
2930 * is ACKed for CWR bit to reach receiver. */ 2782 * is ACKed for CWR bit to reach receiver. */
@@ -2955,18 +2807,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2955 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; 2807 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
2956 break; 2808 break;
2957 case TCP_CA_Loss: 2809 case TCP_CA_Loss:
2958 if (flag & FLAG_DATA_ACKED) 2810 tcp_process_loss(sk, flag, is_dupack);
2959 icsk->icsk_retransmits = 0;
2960 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
2961 tcp_reset_reno_sack(tp);
2962 if (!tcp_try_undo_loss(sk)) {
2963 tcp_moderate_cwnd(tp);
2964 tcp_xmit_retransmit_queue(sk);
2965 return;
2966 }
2967 if (icsk->icsk_ca_state != TCP_CA_Open) 2811 if (icsk->icsk_ca_state != TCP_CA_Open)
2968 return; 2812 return;
2969 /* Loss is undone; fall through to processing in Open state. */ 2813 /* Fall through to processing in Open state. */
2970 default: 2814 default:
2971 if (tcp_is_reno(tp)) { 2815 if (tcp_is_reno(tp)) {
2972 if (flag & FLAG_SND_UNA_ADVANCED) 2816 if (flag & FLAG_SND_UNA_ADVANCED)
@@ -3079,6 +2923,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3079 */ 2923 */
3080void tcp_rearm_rto(struct sock *sk) 2924void tcp_rearm_rto(struct sock *sk)
3081{ 2925{
2926 const struct inet_connection_sock *icsk = inet_csk(sk);
3082 struct tcp_sock *tp = tcp_sk(sk); 2927 struct tcp_sock *tp = tcp_sk(sk);
3083 2928
3084 /* If the retrans timer is currently being used by Fast Open 2929 /* If the retrans timer is currently being used by Fast Open
@@ -3092,12 +2937,13 @@ void tcp_rearm_rto(struct sock *sk)
3092 } else { 2937 } else {
3093 u32 rto = inet_csk(sk)->icsk_rto; 2938 u32 rto = inet_csk(sk)->icsk_rto;
3094 /* Offset the time elapsed after installing regular RTO */ 2939 /* Offset the time elapsed after installing regular RTO */
3095 if (tp->early_retrans_delayed) { 2940 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2941 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3096 struct sk_buff *skb = tcp_write_queue_head(sk); 2942 struct sk_buff *skb = tcp_write_queue_head(sk);
3097 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; 2943 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
3098 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); 2944 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
3099 /* delta may not be positive if the socket is locked 2945 /* delta may not be positive if the socket is locked
3100 * when the delayed ER timer fires and is rescheduled. 2946 * when the retrans timer fires and is rescheduled.
3101 */ 2947 */
3102 if (delta > 0) 2948 if (delta > 0)
3103 rto = delta; 2949 rto = delta;
@@ -3105,7 +2951,6 @@ void tcp_rearm_rto(struct sock *sk)
3105 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, 2951 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3106 TCP_RTO_MAX); 2952 TCP_RTO_MAX);
3107 } 2953 }
3108 tp->early_retrans_delayed = 0;
3109} 2954}
3110 2955
3111/* This function is called when the delayed ER timer fires. TCP enters 2956/* This function is called when the delayed ER timer fires. TCP enters
@@ -3193,8 +3038,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3193 flag |= FLAG_RETRANS_DATA_ACKED; 3038 flag |= FLAG_RETRANS_DATA_ACKED;
3194 ca_seq_rtt = -1; 3039 ca_seq_rtt = -1;
3195 seq_rtt = -1; 3040 seq_rtt = -1;
3196 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3197 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3198 } else { 3041 } else {
3199 ca_seq_rtt = now - scb->when; 3042 ca_seq_rtt = now - scb->when;
3200 last_ackt = skb->tstamp; 3043 last_ackt = skb->tstamp;
@@ -3203,6 +3046,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3203 } 3046 }
3204 if (!(sacked & TCPCB_SACKED_ACKED)) 3047 if (!(sacked & TCPCB_SACKED_ACKED))
3205 reord = min(pkts_acked, reord); 3048 reord = min(pkts_acked, reord);
3049 if (!after(scb->end_seq, tp->high_seq))
3050 flag |= FLAG_ORIG_SACK_ACKED;
3206 } 3051 }
3207 3052
3208 if (sacked & TCPCB_SACKED_ACKED) 3053 if (sacked & TCPCB_SACKED_ACKED)
@@ -3403,150 +3248,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3403 return flag; 3248 return flag;
3404} 3249}
3405 3250
3406/* A very conservative spurious RTO response algorithm: reduce cwnd and
3407 * continue in congestion avoidance.
3408 */
3409static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3410{
3411 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
3412 tp->snd_cwnd_cnt = 0;
3413 TCP_ECN_queue_cwr(tp);
3414 tcp_moderate_cwnd(tp);
3415}
3416
3417/* A conservative spurious RTO response algorithm: reduce cwnd using
3418 * PRR and continue in congestion avoidance.
3419 */
3420static void tcp_cwr_spur_to_response(struct sock *sk)
3421{
3422 tcp_enter_cwr(sk, 0);
3423}
3424
3425static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3426{
3427 if (flag & FLAG_ECE)
3428 tcp_cwr_spur_to_response(sk);
3429 else
3430 tcp_undo_cwr(sk, true);
3431}
3432
3433/* F-RTO spurious RTO detection algorithm (RFC4138)
3434 *
3435 * F-RTO affects during two new ACKs following RTO (well, almost, see inline
3436 * comments). State (ACK number) is kept in frto_counter. When ACK advances
3437 * window (but not to or beyond highest sequence sent before RTO):
3438 * On First ACK, send two new segments out.
3439 * On Second ACK, RTO was likely spurious. Do spurious response (response
3440 * algorithm is not part of the F-RTO detection algorithm
3441 * given in RFC4138 but can be selected separately).
3442 * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
3443 * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
3444 * of Nagle, this is done using frto_counter states 2 and 3, when a new data
3445 * segment of any size sent during F-RTO, state 2 is upgraded to 3.
3446 *
3447 * Rationale: if the RTO was spurious, new ACKs should arrive from the
3448 * original window even after we transmit two new data segments.
3449 *
3450 * SACK version:
3451 * on first step, wait until first cumulative ACK arrives, then move to
3452 * the second step. In second step, the next ACK decides.
3453 *
3454 * F-RTO is implemented (mainly) in four functions:
3455 * - tcp_use_frto() is used to determine if TCP is can use F-RTO
3456 * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
3457 * called when tcp_use_frto() showed green light
3458 * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
3459 * - tcp_enter_frto_loss() is called if there is not enough evidence
3460 * to prove that the RTO is indeed spurious. It transfers the control
3461 * from F-RTO to the conventional RTO recovery
3462 */
3463static bool tcp_process_frto(struct sock *sk, int flag)
3464{
3465 struct tcp_sock *tp = tcp_sk(sk);
3466
3467 tcp_verify_left_out(tp);
3468
3469 /* Duplicate the behavior from Loss state (fastretrans_alert) */
3470 if (flag & FLAG_DATA_ACKED)
3471 inet_csk(sk)->icsk_retransmits = 0;
3472
3473 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3474 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3475 tp->undo_marker = 0;
3476
3477 if (!before(tp->snd_una, tp->frto_highmark)) {
3478 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3479 return true;
3480 }
3481
3482 if (!tcp_is_sackfrto(tp)) {
3483 /* RFC4138 shortcoming in step 2; should also have case c):
3484 * ACK isn't duplicate nor advances window, e.g., opposite dir
3485 * data, winupdate
3486 */
3487 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3488 return true;
3489
3490 if (!(flag & FLAG_DATA_ACKED)) {
3491 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
3492 flag);
3493 return true;
3494 }
3495 } else {
3496 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3497 if (!tcp_packets_in_flight(tp)) {
3498 tcp_enter_frto_loss(sk, 2, flag);
3499 return true;
3500 }
3501
3502 /* Prevent sending of new data. */
3503 tp->snd_cwnd = min(tp->snd_cwnd,
3504 tcp_packets_in_flight(tp));
3505 return true;
3506 }
3507
3508 if ((tp->frto_counter >= 2) &&
3509 (!(flag & FLAG_FORWARD_PROGRESS) ||
3510 ((flag & FLAG_DATA_SACKED) &&
3511 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3512 /* RFC4138 shortcoming (see comment above) */
3513 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3514 (flag & FLAG_NOT_DUP))
3515 return true;
3516
3517 tcp_enter_frto_loss(sk, 3, flag);
3518 return true;
3519 }
3520 }
3521
3522 if (tp->frto_counter == 1) {
3523 /* tcp_may_send_now needs to see updated state */
3524 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3525 tp->frto_counter = 2;
3526
3527 if (!tcp_may_send_now(sk))
3528 tcp_enter_frto_loss(sk, 2, flag);
3529
3530 return true;
3531 } else {
3532 switch (sysctl_tcp_frto_response) {
3533 case 2:
3534 tcp_undo_spur_to_response(sk, flag);
3535 break;
3536 case 1:
3537 tcp_conservative_spur_to_response(tp);
3538 break;
3539 default:
3540 tcp_cwr_spur_to_response(sk);
3541 break;
3542 }
3543 tp->frto_counter = 0;
3544 tp->undo_marker = 0;
3545 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3546 }
3547 return false;
3548}
3549
3550/* RFC 5961 7 [ACK Throttling] */ 3251/* RFC 5961 7 [ACK Throttling] */
3551static void tcp_send_challenge_ack(struct sock *sk) 3252static void tcp_send_challenge_ack(struct sock *sk)
3552{ 3253{
@@ -3586,6 +3287,38 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3586 } 3287 }
3587} 3288}
3588 3289
3290/* This routine deals with acks during a TLP episode.
3291 * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
3292 */
3293static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3294{
3295 struct tcp_sock *tp = tcp_sk(sk);
3296 bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
3297 !(flag & (FLAG_SND_UNA_ADVANCED |
3298 FLAG_NOT_DUP | FLAG_DATA_SACKED));
3299
3300 /* Mark the end of TLP episode on receiving TLP dupack or when
3301 * ack is after tlp_high_seq.
3302 */
3303 if (is_tlp_dupack) {
3304 tp->tlp_high_seq = 0;
3305 return;
3306 }
3307
3308 if (after(ack, tp->tlp_high_seq)) {
3309 tp->tlp_high_seq = 0;
3310 /* Don't reduce cwnd if DSACK arrives for TLP retrans. */
3311 if (!(flag & FLAG_DSACKING_ACK)) {
3312 tcp_init_cwnd_reduction(sk, true);
3313 tcp_set_ca_state(sk, TCP_CA_CWR);
3314 tcp_end_cwnd_reduction(sk);
3315 tcp_set_ca_state(sk, TCP_CA_Open);
3316 NET_INC_STATS_BH(sock_net(sk),
3317 LINUX_MIB_TCPLOSSPROBERECOVERY);
3318 }
3319 }
3320}
3321
3589/* This routine deals with incoming acks, but not outgoing ones. */ 3322/* This routine deals with incoming acks, but not outgoing ones. */
3590static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 3323static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3591{ 3324{
@@ -3600,7 +3333,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3600 int prior_packets; 3333 int prior_packets;
3601 int prior_sacked = tp->sacked_out; 3334 int prior_sacked = tp->sacked_out;
3602 int pkts_acked = 0; 3335 int pkts_acked = 0;
3603 bool frto_cwnd = false;
3604 3336
3605 /* If the ack is older than previous acks 3337 /* If the ack is older than previous acks
3606 * then we can probably ignore it. 3338 * then we can probably ignore it.
@@ -3620,7 +3352,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3620 if (after(ack, tp->snd_nxt)) 3352 if (after(ack, tp->snd_nxt))
3621 goto invalid_ack; 3353 goto invalid_ack;
3622 3354
3623 if (tp->early_retrans_delayed) 3355 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3356 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3624 tcp_rearm_rto(sk); 3357 tcp_rearm_rto(sk);
3625 3358
3626 if (after(ack, prior_snd_una)) 3359 if (after(ack, prior_snd_una))
@@ -3679,30 +3412,29 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3679 3412
3680 pkts_acked = prior_packets - tp->packets_out; 3413 pkts_acked = prior_packets - tp->packets_out;
3681 3414
3682 if (tp->frto_counter)
3683 frto_cwnd = tcp_process_frto(sk, flag);
3684 /* Guarantee sacktag reordering detection against wrap-arounds */
3685 if (before(tp->frto_highmark, tp->snd_una))
3686 tp->frto_highmark = 0;
3687
3688 if (tcp_ack_is_dubious(sk, flag)) { 3415 if (tcp_ack_is_dubious(sk, flag)) {
3689 /* Advance CWND, if state allows this. */ 3416 /* Advance CWND, if state allows this. */
3690 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && 3417 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3691 tcp_may_raise_cwnd(sk, flag))
3692 tcp_cong_avoid(sk, ack, prior_in_flight); 3418 tcp_cong_avoid(sk, ack, prior_in_flight);
3693 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3419 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3694 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3420 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3695 is_dupack, flag); 3421 is_dupack, flag);
3696 } else { 3422 } else {
3697 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3423 if (flag & FLAG_DATA_ACKED)
3698 tcp_cong_avoid(sk, ack, prior_in_flight); 3424 tcp_cong_avoid(sk, ack, prior_in_flight);
3699 } 3425 }
3700 3426
3427 if (tp->tlp_high_seq)
3428 tcp_process_tlp_ack(sk, ack, flag);
3429
3701 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { 3430 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3702 struct dst_entry *dst = __sk_dst_get(sk); 3431 struct dst_entry *dst = __sk_dst_get(sk);
3703 if (dst) 3432 if (dst)
3704 dst_confirm(dst); 3433 dst_confirm(dst);
3705 } 3434 }
3435
3436 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3437 tcp_schedule_loss_probe(sk);
3706 return 1; 3438 return 1;
3707 3439
3708no_queue: 3440no_queue:
@@ -3716,6 +3448,9 @@ no_queue:
3716 */ 3448 */
3717 if (tcp_send_head(sk)) 3449 if (tcp_send_head(sk))
3718 tcp_ack_probe(sk); 3450 tcp_ack_probe(sk);
3451
3452 if (tp->tlp_high_seq)
3453 tcp_process_tlp_ack(sk, ack, flag);
3719 return 1; 3454 return 1;
3720 3455
3721invalid_ack: 3456invalid_ack:
@@ -3740,8 +3475,8 @@ old_ack:
3740 * But, this can also be called on packets in the established flow when 3475 * But, this can also be called on packets in the established flow when
3741 * the fast version below fails. 3476 * the fast version below fails.
3742 */ 3477 */
3743void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, 3478void tcp_parse_options(const struct sk_buff *skb,
3744 const u8 **hvpp, int estab, 3479 struct tcp_options_received *opt_rx, int estab,
3745 struct tcp_fastopen_cookie *foc) 3480 struct tcp_fastopen_cookie *foc)
3746{ 3481{
3747 const unsigned char *ptr; 3482 const unsigned char *ptr;
@@ -3825,31 +3560,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
3825 */ 3560 */
3826 break; 3561 break;
3827#endif 3562#endif
3828 case TCPOPT_COOKIE:
3829 /* This option is variable length.
3830 */
3831 switch (opsize) {
3832 case TCPOLEN_COOKIE_BASE:
3833 /* not yet implemented */
3834 break;
3835 case TCPOLEN_COOKIE_PAIR:
3836 /* not yet implemented */
3837 break;
3838 case TCPOLEN_COOKIE_MIN+0:
3839 case TCPOLEN_COOKIE_MIN+2:
3840 case TCPOLEN_COOKIE_MIN+4:
3841 case TCPOLEN_COOKIE_MIN+6:
3842 case TCPOLEN_COOKIE_MAX:
3843 /* 16-bit multiple */
3844 opt_rx->cookie_plus = opsize;
3845 *hvpp = ptr;
3846 break;
3847 default:
3848 /* ignore option */
3849 break;
3850 }
3851 break;
3852
3853 case TCPOPT_EXP: 3563 case TCPOPT_EXP:
3854 /* Fast Open option shares code 254 using a 3564 /* Fast Open option shares code 254 using a
3855 * 16 bits magic number. It's valid only in 3565 * 16 bits magic number. It's valid only in
@@ -3895,8 +3605,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
3895 * If it is wrong it falls back on tcp_parse_options(). 3605 * If it is wrong it falls back on tcp_parse_options().
3896 */ 3606 */
3897static bool tcp_fast_parse_options(const struct sk_buff *skb, 3607static bool tcp_fast_parse_options(const struct sk_buff *skb,
3898 const struct tcphdr *th, 3608 const struct tcphdr *th, struct tcp_sock *tp)
3899 struct tcp_sock *tp, const u8 **hvpp)
3900{ 3609{
3901 /* In the spirit of fast parsing, compare doff directly to constant 3610 /* In the spirit of fast parsing, compare doff directly to constant
3902 * values. Because equality is used, short doff can be ignored here. 3611 * values. Because equality is used, short doff can be ignored here.
@@ -3910,7 +3619,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
3910 return true; 3619 return true;
3911 } 3620 }
3912 3621
3913 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); 3622 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3914 if (tp->rx_opt.saw_tstamp) 3623 if (tp->rx_opt.saw_tstamp)
3915 tp->rx_opt.rcv_tsecr -= tp->tsoffset; 3624 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3916 3625
@@ -5270,12 +4979,10 @@ out:
5270static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, 4979static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5271 const struct tcphdr *th, int syn_inerr) 4980 const struct tcphdr *th, int syn_inerr)
5272{ 4981{
5273 const u8 *hash_location;
5274 struct tcp_sock *tp = tcp_sk(sk); 4982 struct tcp_sock *tp = tcp_sk(sk);
5275 4983
5276 /* RFC1323: H1. Apply PAWS check first. */ 4984 /* RFC1323: H1. Apply PAWS check first. */
5277 if (tcp_fast_parse_options(skb, th, tp, &hash_location) && 4985 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
5278 tp->rx_opt.saw_tstamp &&
5279 tcp_paws_discard(sk, skb)) { 4986 tcp_paws_discard(sk, skb)) {
5280 if (!th->rst) { 4987 if (!th->rst) {
5281 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); 4988 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@@ -5624,12 +5331,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5624 5331
5625 if (mss == tp->rx_opt.user_mss) { 5332 if (mss == tp->rx_opt.user_mss) {
5626 struct tcp_options_received opt; 5333 struct tcp_options_received opt;
5627 const u8 *hash_location;
5628 5334
5629 /* Get original SYNACK MSS value if user MSS sets mss_clamp */ 5335 /* Get original SYNACK MSS value if user MSS sets mss_clamp */
5630 tcp_clear_options(&opt); 5336 tcp_clear_options(&opt);
5631 opt.user_mss = opt.mss_clamp = 0; 5337 opt.user_mss = opt.mss_clamp = 0;
5632 tcp_parse_options(synack, &opt, &hash_location, 0, NULL); 5338 tcp_parse_options(synack, &opt, 0, NULL);
5633 mss = opt.mss_clamp; 5339 mss = opt.mss_clamp;
5634 } 5340 }
5635 5341
@@ -5660,14 +5366,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5660static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5366static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5661 const struct tcphdr *th, unsigned int len) 5367 const struct tcphdr *th, unsigned int len)
5662{ 5368{
5663 const u8 *hash_location;
5664 struct inet_connection_sock *icsk = inet_csk(sk); 5369 struct inet_connection_sock *icsk = inet_csk(sk);
5665 struct tcp_sock *tp = tcp_sk(sk); 5370 struct tcp_sock *tp = tcp_sk(sk);
5666 struct tcp_cookie_values *cvp = tp->cookie_values;
5667 struct tcp_fastopen_cookie foc = { .len = -1 }; 5371 struct tcp_fastopen_cookie foc = { .len = -1 };
5668 int saved_clamp = tp->rx_opt.mss_clamp; 5372 int saved_clamp = tp->rx_opt.mss_clamp;
5669 5373
5670 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); 5374 tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
5671 if (tp->rx_opt.saw_tstamp) 5375 if (tp->rx_opt.saw_tstamp)
5672 tp->rx_opt.rcv_tsecr -= tp->tsoffset; 5376 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5673 5377
@@ -5764,30 +5468,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5764 * is initialized. */ 5468 * is initialized. */
5765 tp->copied_seq = tp->rcv_nxt; 5469 tp->copied_seq = tp->rcv_nxt;
5766 5470
5767 if (cvp != NULL &&
5768 cvp->cookie_pair_size > 0 &&
5769 tp->rx_opt.cookie_plus > 0) {
5770 int cookie_size = tp->rx_opt.cookie_plus
5771 - TCPOLEN_COOKIE_BASE;
5772 int cookie_pair_size = cookie_size
5773 + cvp->cookie_desired;
5774
5775 /* A cookie extension option was sent and returned.
5776 * Note that each incoming SYNACK replaces the
5777 * Responder cookie. The initial exchange is most
5778 * fragile, as protection against spoofing relies
5779 * entirely upon the sequence and timestamp (above).
5780 * This replacement strategy allows the correct pair to
5781 * pass through, while any others will be filtered via
5782 * Responder verification later.
5783 */
5784 if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
5785 memcpy(&cvp->cookie_pair[cvp->cookie_desired],
5786 hash_location, cookie_size);
5787 cvp->cookie_pair_size = cookie_pair_size;
5788 }
5789 }
5790
5791 smp_mb(); 5471 smp_mb();
5792 5472
5793 tcp_finish_connect(sk, skb); 5473 tcp_finish_connect(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d09203c63264..2278669b1d85 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -838,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
838 */ 838 */
839static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 839static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
840 struct request_sock *req, 840 struct request_sock *req,
841 struct request_values *rvp,
842 u16 queue_mapping, 841 u16 queue_mapping,
843 bool nocache) 842 bool nocache)
844{ 843{
@@ -851,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
851 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 850 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
852 return -1; 851 return -1;
853 852
854 skb = tcp_make_synack(sk, dst, req, rvp, NULL); 853 skb = tcp_make_synack(sk, dst, req, NULL);
855 854
856 if (skb) { 855 if (skb) {
857 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); 856 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
@@ -868,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
868 return err; 867 return err;
869} 868}
870 869
871static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 870static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
872 struct request_values *rvp)
873{ 871{
874 int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); 872 int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
875 873
876 if (!res) 874 if (!res)
877 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 875 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -1371,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1371static int tcp_v4_conn_req_fastopen(struct sock *sk, 1369static int tcp_v4_conn_req_fastopen(struct sock *sk,
1372 struct sk_buff *skb, 1370 struct sk_buff *skb,
1373 struct sk_buff *skb_synack, 1371 struct sk_buff *skb_synack,
1374 struct request_sock *req, 1372 struct request_sock *req)
1375 struct request_values *rvp)
1376{ 1373{
1377 struct tcp_sock *tp = tcp_sk(sk); 1374 struct tcp_sock *tp = tcp_sk(sk);
1378 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 1375 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
@@ -1467,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
1467 1464
1468int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1465int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1469{ 1466{
1470 struct tcp_extend_values tmp_ext;
1471 struct tcp_options_received tmp_opt; 1467 struct tcp_options_received tmp_opt;
1472 const u8 *hash_location;
1473 struct request_sock *req; 1468 struct request_sock *req;
1474 struct inet_request_sock *ireq; 1469 struct inet_request_sock *ireq;
1475 struct tcp_sock *tp = tcp_sk(sk); 1470 struct tcp_sock *tp = tcp_sk(sk);
@@ -1519,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1519 tcp_clear_options(&tmp_opt); 1514 tcp_clear_options(&tmp_opt);
1520 tmp_opt.mss_clamp = TCP_MSS_DEFAULT; 1515 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1521 tmp_opt.user_mss = tp->rx_opt.user_mss; 1516 tmp_opt.user_mss = tp->rx_opt.user_mss;
1522 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, 1517 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1523 want_cookie ? NULL : &foc);
1524
1525 if (tmp_opt.cookie_plus > 0 &&
1526 tmp_opt.saw_tstamp &&
1527 !tp->rx_opt.cookie_out_never &&
1528 (sysctl_tcp_cookie_size > 0 ||
1529 (tp->cookie_values != NULL &&
1530 tp->cookie_values->cookie_desired > 0))) {
1531 u8 *c;
1532 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1533 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1534
1535 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1536 goto drop_and_release;
1537
1538 /* Secret recipe starts with IP addresses */
1539 *mess++ ^= (__force u32)daddr;
1540 *mess++ ^= (__force u32)saddr;
1541
1542 /* plus variable length Initiator Cookie */
1543 c = (u8 *)mess;
1544 while (l-- > 0)
1545 *c++ ^= *hash_location++;
1546
1547 want_cookie = false; /* not our kind of cookie */
1548 tmp_ext.cookie_out_never = 0; /* false */
1549 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1550 } else if (!tp->rx_opt.cookie_in_always) {
1551 /* redundant indications, but ensure initialization. */
1552 tmp_ext.cookie_out_never = 1; /* true */
1553 tmp_ext.cookie_plus = 0;
1554 } else {
1555 goto drop_and_release;
1556 }
1557 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1558 1518
1559 if (want_cookie && !tmp_opt.saw_tstamp) 1519 if (want_cookie && !tmp_opt.saw_tstamp)
1560 tcp_clear_options(&tmp_opt); 1520 tcp_clear_options(&tmp_opt);
@@ -1636,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1636 * of tcp_v4_send_synack()->tcp_select_initial_window(). 1596 * of tcp_v4_send_synack()->tcp_select_initial_window().
1637 */ 1597 */
1638 skb_synack = tcp_make_synack(sk, dst, req, 1598 skb_synack = tcp_make_synack(sk, dst, req,
1639 (struct request_values *)&tmp_ext,
1640 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); 1599 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1641 1600
1642 if (skb_synack) { 1601 if (skb_synack) {
@@ -1660,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1660 if (fastopen_cookie_present(&foc) && foc.len != 0) 1619 if (fastopen_cookie_present(&foc) && foc.len != 0)
1661 NET_INC_STATS_BH(sock_net(sk), 1620 NET_INC_STATS_BH(sock_net(sk),
1662 LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 1621 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1663 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, 1622 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
1664 (struct request_values *)&tmp_ext))
1665 goto drop_and_free; 1623 goto drop_and_free;
1666 1624
1667 return 0; 1625 return 0;
@@ -1950,6 +1908,50 @@ void tcp_v4_early_demux(struct sk_buff *skb)
1950 } 1908 }
1951} 1909}
1952 1910
1911/* Packet is added to VJ-style prequeue for processing in process
1912 * context, if a reader task is waiting. Apparently, this exciting
1913 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1914 * failed somewhere. Latency? Burstiness? Well, at least now we will
1915 * see, why it failed. 8)8) --ANK
1916 *
1917 */
1918bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1919{
1920 struct tcp_sock *tp = tcp_sk(sk);
1921
1922 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1923 return false;
1924
1925 if (skb->len <= tcp_hdrlen(skb) &&
1926 skb_queue_len(&tp->ucopy.prequeue) == 0)
1927 return false;
1928
1929 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1930 tp->ucopy.memory += skb->truesize;
1931 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1932 struct sk_buff *skb1;
1933
1934 BUG_ON(sock_owned_by_user(sk));
1935
1936 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1937 sk_backlog_rcv(sk, skb1);
1938 NET_INC_STATS_BH(sock_net(sk),
1939 LINUX_MIB_TCPPREQUEUEDROPPED);
1940 }
1941
1942 tp->ucopy.memory = 0;
1943 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1944 wake_up_interruptible_sync_poll(sk_sleep(sk),
1945 POLLIN | POLLRDNORM | POLLRDBAND);
1946 if (!inet_csk_ack_scheduled(sk))
1947 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1948 (3 * tcp_rto_min(sk)) / 4,
1949 TCP_RTO_MAX);
1950 }
1951 return true;
1952}
1953EXPORT_SYMBOL(tcp_prequeue);
1954
1953/* 1955/*
1954 * From tcp_input.c 1956 * From tcp_input.c
1955 */ 1957 */
@@ -2197,12 +2199,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
2197 if (inet_csk(sk)->icsk_bind_hash) 2199 if (inet_csk(sk)->icsk_bind_hash)
2198 inet_put_port(sk); 2200 inet_put_port(sk);
2199 2201
2200 /* TCP Cookie Transactions */
2201 if (tp->cookie_values != NULL) {
2202 kref_put(&tp->cookie_values->kref,
2203 tcp_cookie_values_release);
2204 tp->cookie_values = NULL;
2205 }
2206 BUG_ON(tp->fastopen_rsk != NULL); 2202 BUG_ON(tp->fastopen_rsk != NULL);
2207 2203
2208 /* If socket is aborted during connect operation */ 2204 /* If socket is aborted during connect operation */
@@ -2659,7 +2655,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2659 __u16 srcp = ntohs(inet->inet_sport); 2655 __u16 srcp = ntohs(inet->inet_sport);
2660 int rx_queue; 2656 int rx_queue;
2661 2657
2662 if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 2658 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2659 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2660 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2663 timer_active = 1; 2661 timer_active = 1;
2664 timer_expires = icsk->icsk_timeout; 2662 timer_expires = icsk->icsk_timeout;
2665 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2663 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index b6f3583ddfe8..da14436c1735 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -64,7 +64,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
64{ 64{
65 struct cg_proto *cg_proto; 65 struct cg_proto *cg_proto;
66 struct tcp_memcontrol *tcp; 66 struct tcp_memcontrol *tcp;
67 u64 val;
68 67
69 cg_proto = tcp_prot.proto_cgroup(memcg); 68 cg_proto = tcp_prot.proto_cgroup(memcg);
70 if (!cg_proto) 69 if (!cg_proto)
@@ -72,8 +71,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
72 71
73 tcp = tcp_from_cgproto(cg_proto); 72 tcp = tcp_from_cgproto(cg_proto);
74 percpu_counter_destroy(&tcp->tcp_sockets_allocated); 73 percpu_counter_destroy(&tcp->tcp_sockets_allocated);
75
76 val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
77} 74}
78EXPORT_SYMBOL(tcp_destroy_cgroup); 75EXPORT_SYMBOL(tcp_destroy_cgroup);
79 76
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b83a49cc3816..05eaf8904613 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -93,13 +93,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
93 const struct tcphdr *th) 93 const struct tcphdr *th)
94{ 94{
95 struct tcp_options_received tmp_opt; 95 struct tcp_options_received tmp_opt;
96 const u8 *hash_location;
97 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 96 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
98 bool paws_reject = false; 97 bool paws_reject = false;
99 98
100 tmp_opt.saw_tstamp = 0; 99 tmp_opt.saw_tstamp = 0;
101 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { 100 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
102 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 101 tcp_parse_options(skb, &tmp_opt, 0, NULL);
103 102
104 if (tmp_opt.saw_tstamp) { 103 if (tmp_opt.saw_tstamp) {
105 tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; 104 tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset;
@@ -388,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
388 struct tcp_request_sock *treq = tcp_rsk(req); 387 struct tcp_request_sock *treq = tcp_rsk(req);
389 struct inet_connection_sock *newicsk = inet_csk(newsk); 388 struct inet_connection_sock *newicsk = inet_csk(newsk);
390 struct tcp_sock *newtp = tcp_sk(newsk); 389 struct tcp_sock *newtp = tcp_sk(newsk);
391 struct tcp_sock *oldtp = tcp_sk(sk);
392 struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
393
394 /* TCP Cookie Transactions require space for the cookie pair,
395 * as it differs for each connection. There is no need to
396 * copy any s_data_payload stored at the original socket.
397 * Failure will prevent resuming the connection.
398 *
399 * Presumed copied, in order of appearance:
400 * cookie_in_always, cookie_out_never
401 */
402 if (oldcvp != NULL) {
403 struct tcp_cookie_values *newcvp =
404 kzalloc(sizeof(*newtp->cookie_values),
405 GFP_ATOMIC);
406
407 if (newcvp != NULL) {
408 kref_init(&newcvp->kref);
409 newcvp->cookie_desired =
410 oldcvp->cookie_desired;
411 newtp->cookie_values = newcvp;
412 } else {
413 /* Not Yet Implemented */
414 newtp->cookie_values = NULL;
415 }
416 }
417 390
418 /* Now setup tcp_sock */ 391 /* Now setup tcp_sock */
419 newtp->pred_flags = 0; 392 newtp->pred_flags = 0;
@@ -422,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
422 newtp->rcv_nxt = treq->rcv_isn + 1; 395 newtp->rcv_nxt = treq->rcv_isn + 1;
423 396
424 newtp->snd_sml = newtp->snd_una = 397 newtp->snd_sml = newtp->snd_una =
425 newtp->snd_nxt = newtp->snd_up = 398 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
426 treq->snt_isn + 1 + tcp_s_data_size(oldtp);
427 399
428 tcp_prequeue_init(newtp); 400 tcp_prequeue_init(newtp);
429 INIT_LIST_HEAD(&newtp->tsq_node); 401 INIT_LIST_HEAD(&newtp->tsq_node);
@@ -440,6 +412,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
440 newtp->fackets_out = 0; 412 newtp->fackets_out = 0;
441 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 413 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
442 tcp_enable_early_retrans(newtp); 414 tcp_enable_early_retrans(newtp);
415 newtp->tlp_high_seq = 0;
443 416
444 /* So many TCP implementations out there (incorrectly) count the 417 /* So many TCP implementations out there (incorrectly) count the
445 * initial SYN frame in their delayed-ACK and congestion control 418 * initial SYN frame in their delayed-ACK and congestion control
@@ -449,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
449 newtp->snd_cwnd = TCP_INIT_CWND; 422 newtp->snd_cwnd = TCP_INIT_CWND;
450 newtp->snd_cwnd_cnt = 0; 423 newtp->snd_cwnd_cnt = 0;
451 424
452 newtp->frto_counter = 0;
453 newtp->frto_highmark = 0;
454
455 if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && 425 if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
456 !try_module_get(newicsk->icsk_ca_ops->owner)) 426 !try_module_get(newicsk->icsk_ca_ops->owner))
457 newicsk->icsk_ca_ops = &tcp_init_congestion_ops; 427 newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
@@ -459,8 +429,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
459 tcp_set_ca_state(newsk, TCP_CA_Open); 429 tcp_set_ca_state(newsk, TCP_CA_Open);
460 tcp_init_xmit_timers(newsk); 430 tcp_init_xmit_timers(newsk);
461 skb_queue_head_init(&newtp->out_of_order_queue); 431 skb_queue_head_init(&newtp->out_of_order_queue);
462 newtp->write_seq = newtp->pushed_seq = 432 newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
463 treq->snt_isn + 1 + tcp_s_data_size(oldtp);
464 433
465 newtp->rx_opt.saw_tstamp = 0; 434 newtp->rx_opt.saw_tstamp = 0;
466 435
@@ -537,7 +506,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
537 bool fastopen) 506 bool fastopen)
538{ 507{
539 struct tcp_options_received tmp_opt; 508 struct tcp_options_received tmp_opt;
540 const u8 *hash_location;
541 struct sock *child; 509 struct sock *child;
542 const struct tcphdr *th = tcp_hdr(skb); 510 const struct tcphdr *th = tcp_hdr(skb);
543 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); 511 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
@@ -547,7 +515,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
547 515
548 tmp_opt.saw_tstamp = 0; 516 tmp_opt.saw_tstamp = 0;
549 if (th->doff > (sizeof(struct tcphdr)>>2)) { 517 if (th->doff > (sizeof(struct tcphdr)>>2)) {
550 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 518 tcp_parse_options(skb, &tmp_opt, 0, NULL);
551 519
552 if (tmp_opt.saw_tstamp) { 520 if (tmp_opt.saw_tstamp) {
553 tmp_opt.ts_recent = req->ts_recent; 521 tmp_opt.ts_recent = req->ts_recent;
@@ -647,7 +615,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
647 */ 615 */
648 if ((flg & TCP_FLAG_ACK) && !fastopen && 616 if ((flg & TCP_FLAG_ACK) && !fastopen &&
649 (TCP_SKB_CB(skb)->ack_seq != 617 (TCP_SKB_CB(skb)->ack_seq !=
650 tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) 618 tcp_rsk(req)->snt_isn + 1))
651 return sk; 619 return sk;
652 620
653 /* Also, it would be not so bad idea to check rcv_tsecr, which 621 /* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 509912a5ff98..b735c23a961d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,27 +65,22 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
65/* By default, RFC2861 behavior. */ 65/* By default, RFC2861 behavior. */
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67 67
68int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
69EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
70
71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
72 int push_one, gfp_t gfp); 69 int push_one, gfp_t gfp);
73 70
74/* Account for new data that has been sent to the network. */ 71/* Account for new data that has been sent to the network. */
75static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) 72static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
76{ 73{
74 struct inet_connection_sock *icsk = inet_csk(sk);
77 struct tcp_sock *tp = tcp_sk(sk); 75 struct tcp_sock *tp = tcp_sk(sk);
78 unsigned int prior_packets = tp->packets_out; 76 unsigned int prior_packets = tp->packets_out;
79 77
80 tcp_advance_send_head(sk, skb); 78 tcp_advance_send_head(sk, skb);
81 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 79 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
82 80
83 /* Don't override Nagle indefinitely with F-RTO */
84 if (tp->frto_counter == 2)
85 tp->frto_counter = 3;
86
87 tp->packets_out += tcp_skb_pcount(skb); 81 tp->packets_out += tcp_skb_pcount(skb);
88 if (!prior_packets || tp->early_retrans_delayed) 82 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
83 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
89 tcp_rearm_rto(sk); 84 tcp_rearm_rto(sk);
90} 85}
91 86
@@ -384,7 +379,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
384#define OPTION_TS (1 << 1) 379#define OPTION_TS (1 << 1)
385#define OPTION_MD5 (1 << 2) 380#define OPTION_MD5 (1 << 2)
386#define OPTION_WSCALE (1 << 3) 381#define OPTION_WSCALE (1 << 3)
387#define OPTION_COOKIE_EXTENSION (1 << 4)
388#define OPTION_FAST_OPEN_COOKIE (1 << 8) 382#define OPTION_FAST_OPEN_COOKIE (1 << 8)
389 383
390struct tcp_out_options { 384struct tcp_out_options {
@@ -398,36 +392,6 @@ struct tcp_out_options {
398 struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ 392 struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
399}; 393};
400 394
401/* The sysctl int routines are generic, so check consistency here.
402 */
403static u8 tcp_cookie_size_check(u8 desired)
404{
405 int cookie_size;
406
407 if (desired > 0)
408 /* previously specified */
409 return desired;
410
411 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
412 if (cookie_size <= 0)
413 /* no default specified */
414 return 0;
415
416 if (cookie_size <= TCP_COOKIE_MIN)
417 /* value too small, specify minimum */
418 return TCP_COOKIE_MIN;
419
420 if (cookie_size >= TCP_COOKIE_MAX)
421 /* value too large, specify maximum */
422 return TCP_COOKIE_MAX;
423
424 if (cookie_size & 1)
425 /* 8-bit multiple, illegal, fix it */
426 cookie_size++;
427
428 return (u8)cookie_size;
429}
430
431/* Write previously computed TCP options to the packet. 395/* Write previously computed TCP options to the packet.
432 * 396 *
433 * Beware: Something in the Internet is very sensitive to the ordering of 397 * Beware: Something in the Internet is very sensitive to the ordering of
@@ -446,27 +410,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
446{ 410{
447 u16 options = opts->options; /* mungable copy */ 411 u16 options = opts->options; /* mungable copy */
448 412
449 /* Having both authentication and cookies for security is redundant,
450 * and there's certainly not enough room. Instead, the cookie-less
451 * extension variant is proposed.
452 *
453 * Consider the pessimal case with authentication. The options
454 * could look like:
455 * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40
456 */
457 if (unlikely(OPTION_MD5 & options)) { 413 if (unlikely(OPTION_MD5 & options)) {
458 if (unlikely(OPTION_COOKIE_EXTENSION & options)) { 414 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
459 *ptr++ = htonl((TCPOPT_COOKIE << 24) | 415 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
460 (TCPOLEN_COOKIE_BASE << 16) |
461 (TCPOPT_MD5SIG << 8) |
462 TCPOLEN_MD5SIG);
463 } else {
464 *ptr++ = htonl((TCPOPT_NOP << 24) |
465 (TCPOPT_NOP << 16) |
466 (TCPOPT_MD5SIG << 8) |
467 TCPOLEN_MD5SIG);
468 }
469 options &= ~OPTION_COOKIE_EXTENSION;
470 /* overload cookie hash location */ 416 /* overload cookie hash location */
471 opts->hash_location = (__u8 *)ptr; 417 opts->hash_location = (__u8 *)ptr;
472 ptr += 4; 418 ptr += 4;
@@ -495,44 +441,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
495 *ptr++ = htonl(opts->tsecr); 441 *ptr++ = htonl(opts->tsecr);
496 } 442 }
497 443
498 /* Specification requires after timestamp, so do it now.
499 *
500 * Consider the pessimal case without authentication. The options
501 * could look like:
502 * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40
503 */
504 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
505 __u8 *cookie_copy = opts->hash_location;
506 u8 cookie_size = opts->hash_size;
507
508 /* 8-bit multiple handled in tcp_cookie_size_check() above,
509 * and elsewhere.
510 */
511 if (0x2 & cookie_size) {
512 __u8 *p = (__u8 *)ptr;
513
514 /* 16-bit multiple */
515 *p++ = TCPOPT_COOKIE;
516 *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
517 *p++ = *cookie_copy++;
518 *p++ = *cookie_copy++;
519 ptr++;
520 cookie_size -= 2;
521 } else {
522 /* 32-bit multiple */
523 *ptr++ = htonl(((TCPOPT_NOP << 24) |
524 (TCPOPT_NOP << 16) |
525 (TCPOPT_COOKIE << 8) |
526 TCPOLEN_COOKIE_BASE) +
527 cookie_size);
528 }
529
530 if (cookie_size > 0) {
531 memcpy(ptr, cookie_copy, cookie_size);
532 ptr += (cookie_size / 4);
533 }
534 }
535
536 if (unlikely(OPTION_SACK_ADVERTISE & options)) { 444 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
537 *ptr++ = htonl((TCPOPT_NOP << 24) | 445 *ptr++ = htonl((TCPOPT_NOP << 24) |
538 (TCPOPT_NOP << 16) | 446 (TCPOPT_NOP << 16) |
@@ -591,11 +499,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
591 struct tcp_md5sig_key **md5) 499 struct tcp_md5sig_key **md5)
592{ 500{
593 struct tcp_sock *tp = tcp_sk(sk); 501 struct tcp_sock *tp = tcp_sk(sk);
594 struct tcp_cookie_values *cvp = tp->cookie_values;
595 unsigned int remaining = MAX_TCP_OPTION_SPACE; 502 unsigned int remaining = MAX_TCP_OPTION_SPACE;
596 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
597 tcp_cookie_size_check(cvp->cookie_desired) :
598 0;
599 struct tcp_fastopen_request *fastopen = tp->fastopen_req; 503 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
600 504
601#ifdef CONFIG_TCP_MD5SIG 505#ifdef CONFIG_TCP_MD5SIG
@@ -647,52 +551,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
647 tp->syn_fastopen = 1; 551 tp->syn_fastopen = 1;
648 } 552 }
649 } 553 }
650 /* Note that timestamps are required by the specification.
651 *
652 * Odd numbers of bytes are prohibited by the specification, ensuring
653 * that the cookie is 16-bit aligned, and the resulting cookie pair is
654 * 32-bit aligned.
655 */
656 if (*md5 == NULL &&
657 (OPTION_TS & opts->options) &&
658 cookie_size > 0) {
659 int need = TCPOLEN_COOKIE_BASE + cookie_size;
660
661 if (0x2 & need) {
662 /* 32-bit multiple */
663 need += 2; /* NOPs */
664
665 if (need > remaining) {
666 /* try shrinking cookie to fit */
667 cookie_size -= 2;
668 need -= 4;
669 }
670 }
671 while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
672 cookie_size -= 4;
673 need -= 4;
674 }
675 if (TCP_COOKIE_MIN <= cookie_size) {
676 opts->options |= OPTION_COOKIE_EXTENSION;
677 opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
678 opts->hash_size = cookie_size;
679
680 /* Remember for future incarnations. */
681 cvp->cookie_desired = cookie_size;
682
683 if (cvp->cookie_desired != cvp->cookie_pair_size) {
684 /* Currently use random bytes as a nonce,
685 * assuming these are completely unpredictable
686 * by hostile users of the same system.
687 */
688 get_random_bytes(&cvp->cookie_pair[0],
689 cookie_size);
690 cvp->cookie_pair_size = cookie_size;
691 }
692 554
693 remaining -= need;
694 }
695 }
696 return MAX_TCP_OPTION_SPACE - remaining; 555 return MAX_TCP_OPTION_SPACE - remaining;
697} 556}
698 557
@@ -702,14 +561,10 @@ static unsigned int tcp_synack_options(struct sock *sk,
702 unsigned int mss, struct sk_buff *skb, 561 unsigned int mss, struct sk_buff *skb,
703 struct tcp_out_options *opts, 562 struct tcp_out_options *opts,
704 struct tcp_md5sig_key **md5, 563 struct tcp_md5sig_key **md5,
705 struct tcp_extend_values *xvp,
706 struct tcp_fastopen_cookie *foc) 564 struct tcp_fastopen_cookie *foc)
707{ 565{
708 struct inet_request_sock *ireq = inet_rsk(req); 566 struct inet_request_sock *ireq = inet_rsk(req);
709 unsigned int remaining = MAX_TCP_OPTION_SPACE; 567 unsigned int remaining = MAX_TCP_OPTION_SPACE;
710 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
711 xvp->cookie_plus :
712 0;
713 568
714#ifdef CONFIG_TCP_MD5SIG 569#ifdef CONFIG_TCP_MD5SIG
715 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); 570 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
@@ -757,28 +612,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
757 remaining -= need; 612 remaining -= need;
758 } 613 }
759 } 614 }
760 /* Similar rationale to tcp_syn_options() applies here, too. 615
761 * If the <SYN> options fit, the same options should fit now!
762 */
763 if (*md5 == NULL &&
764 ireq->tstamp_ok &&
765 cookie_plus > TCPOLEN_COOKIE_BASE) {
766 int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
767
768 if (0x2 & need) {
769 /* 32-bit multiple */
770 need += 2; /* NOPs */
771 }
772 if (need <= remaining) {
773 opts->options |= OPTION_COOKIE_EXTENSION;
774 opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
775 remaining -= need;
776 } else {
777 /* There's no error return, so flag it. */
778 xvp->cookie_out_never = 1; /* true */
779 opts->hash_size = 0;
780 }
781 }
782 return MAX_TCP_OPTION_SPACE - remaining; 616 return MAX_TCP_OPTION_SPACE - remaining;
783} 617}
784 618
@@ -953,7 +787,7 @@ void __init tcp_tasklet_init(void)
953 * We cant xmit new skbs from this context, as we might already 787 * We cant xmit new skbs from this context, as we might already
954 * hold qdisc lock. 788 * hold qdisc lock.
955 */ 789 */
956static void tcp_wfree(struct sk_buff *skb) 790void tcp_wfree(struct sk_buff *skb)
957{ 791{
958 struct sock *sk = skb->sk; 792 struct sock *sk = skb->sk;
959 struct tcp_sock *tp = tcp_sk(sk); 793 struct tcp_sock *tp = tcp_sk(sk);
@@ -1012,6 +846,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1012 __net_timestamp(skb); 846 __net_timestamp(skb);
1013 847
1014 if (likely(clone_it)) { 848 if (likely(clone_it)) {
849 const struct sk_buff *fclone = skb + 1;
850
851 if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
852 fclone->fclone == SKB_FCLONE_CLONE))
853 NET_INC_STATS_BH(sock_net(sk),
854 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
855
1015 if (unlikely(skb_cloned(skb))) 856 if (unlikely(skb_cloned(skb)))
1016 skb = pskb_copy(skb, gfp_mask); 857 skb = pskb_copy(skb, gfp_mask);
1017 else 858 else
@@ -1632,11 +1473,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
1632 if (nonagle & TCP_NAGLE_PUSH) 1473 if (nonagle & TCP_NAGLE_PUSH)
1633 return true; 1474 return true;
1634 1475
1635 /* Don't use the nagle rule for urgent data (or for the final FIN). 1476 /* Don't use the nagle rule for urgent data (or for the final FIN). */
1636 * Nagle can be ignored during F-RTO too (see RFC4138). 1477 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1637 */
1638 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1639 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1640 return true; 1478 return true;
1641 1479
1642 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) 1480 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1961,6 +1799,9 @@ static int tcp_mtu_probe(struct sock *sk)
1961 * snd_up-64k-mss .. snd_up cannot be large. However, taking into 1799 * snd_up-64k-mss .. snd_up cannot be large. However, taking into
1962 * account rare use of URG, this is not a big flaw. 1800 * account rare use of URG, this is not a big flaw.
1963 * 1801 *
1802 * Send at most one packet when push_one > 0. Temporarily ignore
1803 * cwnd limit to force at most one packet out when push_one == 2.
1804
1964 * Returns true, if no segments are in flight and we have queued segments, 1805 * Returns true, if no segments are in flight and we have queued segments,
1965 * but cannot send anything now because of SWS or another problem. 1806 * but cannot send anything now because of SWS or another problem.
1966 */ 1807 */
@@ -1996,8 +1837,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1996 goto repair; /* Skip network transmission */ 1837 goto repair; /* Skip network transmission */
1997 1838
1998 cwnd_quota = tcp_cwnd_test(tp, skb); 1839 cwnd_quota = tcp_cwnd_test(tp, skb);
1999 if (!cwnd_quota) 1840 if (!cwnd_quota) {
2000 break; 1841 if (push_one == 2)
1842 /* Force out a loss probe pkt. */
1843 cwnd_quota = 1;
1844 else
1845 break;
1846 }
2001 1847
2002 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) 1848 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2003 break; 1849 break;
@@ -2051,10 +1897,129 @@ repair:
2051 if (likely(sent_pkts)) { 1897 if (likely(sent_pkts)) {
2052 if (tcp_in_cwnd_reduction(sk)) 1898 if (tcp_in_cwnd_reduction(sk))
2053 tp->prr_out += sent_pkts; 1899 tp->prr_out += sent_pkts;
1900
1901 /* Send one loss probe per tail loss episode. */
1902 if (push_one != 2)
1903 tcp_schedule_loss_probe(sk);
2054 tcp_cwnd_validate(sk); 1904 tcp_cwnd_validate(sk);
2055 return false; 1905 return false;
2056 } 1906 }
2057 return !tp->packets_out && tcp_send_head(sk); 1907 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
1908}
1909
1910bool tcp_schedule_loss_probe(struct sock *sk)
1911{
1912 struct inet_connection_sock *icsk = inet_csk(sk);
1913 struct tcp_sock *tp = tcp_sk(sk);
1914 u32 timeout, tlp_time_stamp, rto_time_stamp;
1915 u32 rtt = tp->srtt >> 3;
1916
1917 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
1918 return false;
1919 /* No consecutive loss probes. */
1920 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
1921 tcp_rearm_rto(sk);
1922 return false;
1923 }
1924 /* Don't do any loss probe on a Fast Open connection before 3WHS
1925 * finishes.
1926 */
1927 if (sk->sk_state == TCP_SYN_RECV)
1928 return false;
1929
1930 /* TLP is only scheduled when next timer event is RTO. */
1931 if (icsk->icsk_pending != ICSK_TIME_RETRANS)
1932 return false;
1933
1934 /* Schedule a loss probe in 2*RTT for SACK capable connections
1935 * in Open state, that are either limited by cwnd or application.
1936 */
1937 if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
1938 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
1939 return false;
1940
1941 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
1942 tcp_send_head(sk))
1943 return false;
1944
1945 /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
1946 * for delayed ack when there's one outstanding packet.
1947 */
1948 timeout = rtt << 1;
1949 if (tp->packets_out == 1)
1950 timeout = max_t(u32, timeout,
1951 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
1952 timeout = max_t(u32, timeout, msecs_to_jiffies(10));
1953
1954 /* If RTO is shorter, just schedule TLP in its place. */
1955 tlp_time_stamp = tcp_time_stamp + timeout;
1956 rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
1957 if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
1958 s32 delta = rto_time_stamp - tcp_time_stamp;
1959 if (delta > 0)
1960 timeout = delta;
1961 }
1962
1963 inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
1964 TCP_RTO_MAX);
1965 return true;
1966}
1967
1968/* When probe timeout (PTO) fires, send a new segment if one exists, else
1969 * retransmit the last segment.
1970 */
1971void tcp_send_loss_probe(struct sock *sk)
1972{
1973 struct tcp_sock *tp = tcp_sk(sk);
1974 struct sk_buff *skb;
1975 int pcount;
1976 int mss = tcp_current_mss(sk);
1977 int err = -1;
1978
1979 if (tcp_send_head(sk) != NULL) {
1980 err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
1981 goto rearm_timer;
1982 }
1983
1984 /* At most one outstanding TLP retransmission. */
1985 if (tp->tlp_high_seq)
1986 goto rearm_timer;
1987
1988 /* Retransmit last segment. */
1989 skb = tcp_write_queue_tail(sk);
1990 if (WARN_ON(!skb))
1991 goto rearm_timer;
1992
1993 pcount = tcp_skb_pcount(skb);
1994 if (WARN_ON(!pcount))
1995 goto rearm_timer;
1996
1997 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
1998 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
1999 goto rearm_timer;
2000 skb = tcp_write_queue_tail(sk);
2001 }
2002
2003 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2004 goto rearm_timer;
2005
2006 /* Probe with zero data doesn't trigger fast recovery. */
2007 if (skb->len > 0)
2008 err = __tcp_retransmit_skb(sk, skb);
2009
2010 /* Record snd_nxt for loss detection. */
2011 if (likely(!err))
2012 tp->tlp_high_seq = tp->snd_nxt;
2013
2014rearm_timer:
2015 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2016 inet_csk(sk)->icsk_rto,
2017 TCP_RTO_MAX);
2018
2019 if (likely(!err))
2020 NET_INC_STATS_BH(sock_net(sk),
2021 LINUX_MIB_TCPLOSSPROBES);
2022 return;
2058} 2023}
2059 2024
2060/* Push out any pending frames which were held back due to 2025/* Push out any pending frames which were held back due to
@@ -2679,32 +2644,24 @@ int tcp_send_synack(struct sock *sk)
2679 * sk: listener socket 2644 * sk: listener socket
2680 * dst: dst entry attached to the SYNACK 2645 * dst: dst entry attached to the SYNACK
2681 * req: request_sock pointer 2646 * req: request_sock pointer
2682 * rvp: request_values pointer
2683 * 2647 *
2684 * Allocate one skb and build a SYNACK packet. 2648 * Allocate one skb and build a SYNACK packet.
2685 * @dst is consumed : Caller should not use it again. 2649 * @dst is consumed : Caller should not use it again.
2686 */ 2650 */
2687struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2651struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2688 struct request_sock *req, 2652 struct request_sock *req,
2689 struct request_values *rvp,
2690 struct tcp_fastopen_cookie *foc) 2653 struct tcp_fastopen_cookie *foc)
2691{ 2654{
2692 struct tcp_out_options opts; 2655 struct tcp_out_options opts;
2693 struct tcp_extend_values *xvp = tcp_xv(rvp);
2694 struct inet_request_sock *ireq = inet_rsk(req); 2656 struct inet_request_sock *ireq = inet_rsk(req);
2695 struct tcp_sock *tp = tcp_sk(sk); 2657 struct tcp_sock *tp = tcp_sk(sk);
2696 const struct tcp_cookie_values *cvp = tp->cookie_values;
2697 struct tcphdr *th; 2658 struct tcphdr *th;
2698 struct sk_buff *skb; 2659 struct sk_buff *skb;
2699 struct tcp_md5sig_key *md5; 2660 struct tcp_md5sig_key *md5;
2700 int tcp_header_size; 2661 int tcp_header_size;
2701 int mss; 2662 int mss;
2702 int s_data_desired = 0;
2703 2663
2704 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) 2664 skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));
2705 s_data_desired = cvp->s_data_desired;
2706 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
2707 sk_gfp_atomic(sk, GFP_ATOMIC));
2708 if (unlikely(!skb)) { 2665 if (unlikely(!skb)) {
2709 dst_release(dst); 2666 dst_release(dst);
2710 return NULL; 2667 return NULL;
@@ -2747,9 +2704,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2747 else 2704 else
2748#endif 2705#endif
2749 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2706 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2750 tcp_header_size = tcp_synack_options(sk, req, mss, 2707 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
2751 skb, &opts, &md5, xvp, foc) 2708 foc) + sizeof(*th);
2752 + sizeof(*th);
2753 2709
2754 skb_push(skb, tcp_header_size); 2710 skb_push(skb, tcp_header_size);
2755 skb_reset_transport_header(skb); 2711 skb_reset_transport_header(skb);
@@ -2767,40 +2723,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2767 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, 2723 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2768 TCPHDR_SYN | TCPHDR_ACK); 2724 TCPHDR_SYN | TCPHDR_ACK);
2769 2725
2770 if (OPTION_COOKIE_EXTENSION & opts.options) {
2771 if (s_data_desired) {
2772 u8 *buf = skb_put(skb, s_data_desired);
2773
2774 /* copy data directly from the listening socket. */
2775 memcpy(buf, cvp->s_data_payload, s_data_desired);
2776 TCP_SKB_CB(skb)->end_seq += s_data_desired;
2777 }
2778
2779 if (opts.hash_size > 0) {
2780 __u32 workspace[SHA_WORKSPACE_WORDS];
2781 u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
2782 u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
2783
2784 /* Secret recipe depends on the Timestamp, (future)
2785 * Sequence and Acknowledgment Numbers, Initiator
2786 * Cookie, and others handled by IP variant caller.
2787 */
2788 *tail-- ^= opts.tsval;
2789 *tail-- ^= tcp_rsk(req)->rcv_isn + 1;
2790 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2791
2792 /* recommended */
2793 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2794 *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
2795
2796 sha_transform((__u32 *)&xvp->cookie_bakery[0],
2797 (char *)mess,
2798 &workspace[0]);
2799 opts.hash_location =
2800 (__u8 *)&xvp->cookie_bakery[0];
2801 }
2802 }
2803
2804 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2726 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2805 /* XXX data is queued and acked as is. No buffer/window check */ 2727 /* XXX data is queued and acked as is. No buffer/window check */
2806 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); 2728 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b78aac30c498..4b85e6f636c9 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk)
342 struct tcp_sock *tp = tcp_sk(sk); 342 struct tcp_sock *tp = tcp_sk(sk);
343 struct inet_connection_sock *icsk = inet_csk(sk); 343 struct inet_connection_sock *icsk = inet_csk(sk);
344 344
345 if (tp->early_retrans_delayed) {
346 tcp_resume_early_retransmit(sk);
347 return;
348 }
349 if (tp->fastopen_rsk) { 345 if (tp->fastopen_rsk) {
350 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && 346 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
351 sk->sk_state != TCP_FIN_WAIT1); 347 sk->sk_state != TCP_FIN_WAIT1);
@@ -360,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk)
360 356
361 WARN_ON(tcp_write_queue_empty(sk)); 357 WARN_ON(tcp_write_queue_empty(sk));
362 358
359 tp->tlp_high_seq = 0;
360
363 if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && 361 if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
364 !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { 362 !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
365 /* Receiver dastardly shrinks window. Our retransmits 363 /* Receiver dastardly shrinks window. Our retransmits
@@ -418,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk)
418 NET_INC_STATS_BH(sock_net(sk), mib_idx); 416 NET_INC_STATS_BH(sock_net(sk), mib_idx);
419 } 417 }
420 418
421 if (tcp_use_frto(sk)) { 419 tcp_enter_loss(sk, 0);
422 tcp_enter_frto(sk);
423 } else {
424 tcp_enter_loss(sk, 0);
425 }
426 420
427 if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { 421 if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
428 /* Retransmission failed because of local congestion, 422 /* Retransmission failed because of local congestion,
@@ -495,13 +489,20 @@ void tcp_write_timer_handler(struct sock *sk)
495 } 489 }
496 490
497 event = icsk->icsk_pending; 491 event = icsk->icsk_pending;
498 icsk->icsk_pending = 0;
499 492
500 switch (event) { 493 switch (event) {
494 case ICSK_TIME_EARLY_RETRANS:
495 tcp_resume_early_retransmit(sk);
496 break;
497 case ICSK_TIME_LOSS_PROBE:
498 tcp_send_loss_probe(sk);
499 break;
501 case ICSK_TIME_RETRANS: 500 case ICSK_TIME_RETRANS:
501 icsk->icsk_pending = 0;
502 tcp_retransmit_timer(sk); 502 tcp_retransmit_timer(sk);
503 break; 503 break;
504 case ICSK_TIME_PROBE0: 504 case ICSK_TIME_PROBE0:
505 icsk->icsk_pending = 0;
505 tcp_probe_timer(sk); 506 tcp_probe_timer(sk);
506 break; 507 break;
507 } 508 }
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 1b91bf48e277..76a1e23259e1 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
236 tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 236 tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
237 break; 237 break;
238 238
239 case CA_EVENT_FRTO: 239 case CA_EVENT_LOSS:
240 tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 240 tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
241 /* Update RTT_min when next ack arrives */ 241 /* Update RTT_min when next ack arrives */
242 w->reset_rtt_min = 1; 242 w->reset_rtt_min = 1;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0a073a263720..2722db024a0b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -902,9 +902,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
902 ipc.addr = inet->inet_saddr; 902 ipc.addr = inet->inet_saddr;
903 903
904 ipc.oif = sk->sk_bound_dev_if; 904 ipc.oif = sk->sk_bound_dev_if;
905 err = sock_tx_timestamp(sk, &ipc.tx_flags); 905
906 if (err) 906 sock_tx_timestamp(sk, &ipc.tx_flags);
907 return err; 907
908 if (msg->msg_controllen) { 908 if (msg->msg_controllen) {
909 err = ip_cmsg_send(sock_net(sk), msg, &ipc); 909 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
910 if (err) 910 if (err)
@@ -2279,31 +2279,88 @@ void __init udp_init(void)
2279 2279
2280int udp4_ufo_send_check(struct sk_buff *skb) 2280int udp4_ufo_send_check(struct sk_buff *skb)
2281{ 2281{
2282 const struct iphdr *iph; 2282 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
2283 struct udphdr *uh;
2284
2285 if (!pskb_may_pull(skb, sizeof(*uh)))
2286 return -EINVAL; 2283 return -EINVAL;
2287 2284
2288 iph = ip_hdr(skb); 2285 if (likely(!skb->encapsulation)) {
2289 uh = udp_hdr(skb); 2286 const struct iphdr *iph;
2287 struct udphdr *uh;
2288
2289 iph = ip_hdr(skb);
2290 uh = udp_hdr(skb);
2290 2291
2291 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, 2292 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
2292 IPPROTO_UDP, 0); 2293 IPPROTO_UDP, 0);
2293 skb->csum_start = skb_transport_header(skb) - skb->head; 2294 skb->csum_start = skb_transport_header(skb) - skb->head;
2294 skb->csum_offset = offsetof(struct udphdr, check); 2295 skb->csum_offset = offsetof(struct udphdr, check);
2295 skb->ip_summed = CHECKSUM_PARTIAL; 2296 skb->ip_summed = CHECKSUM_PARTIAL;
2297 }
2296 return 0; 2298 return 0;
2297} 2299}
2298 2300
2301static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2302 netdev_features_t features)
2303{
2304 struct sk_buff *segs = ERR_PTR(-EINVAL);
2305 int mac_len = skb->mac_len;
2306 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
2307 int outer_hlen;
2308 netdev_features_t enc_features;
2309
2310 if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
2311 goto out;
2312
2313 skb->encapsulation = 0;
2314 __skb_pull(skb, tnl_hlen);
2315 skb_reset_mac_header(skb);
2316 skb_set_network_header(skb, skb_inner_network_offset(skb));
2317 skb->mac_len = skb_inner_network_offset(skb);
2318
2319 /* segment inner packet. */
2320 enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
2321 segs = skb_mac_gso_segment(skb, enc_features);
2322 if (!segs || IS_ERR(segs))
2323 goto out;
2324
2325 outer_hlen = skb_tnl_header_len(skb);
2326 skb = segs;
2327 do {
2328 struct udphdr *uh;
2329 int udp_offset = outer_hlen - tnl_hlen;
2330
2331 skb->mac_len = mac_len;
2332
2333 skb_push(skb, outer_hlen);
2334 skb_reset_mac_header(skb);
2335 skb_set_network_header(skb, mac_len);
2336 skb_set_transport_header(skb, udp_offset);
2337 uh = udp_hdr(skb);
2338 uh->len = htons(skb->len - udp_offset);
2339
2340 /* csum segment if tunnel sets skb with csum. */
2341 if (unlikely(uh->check)) {
2342 struct iphdr *iph = ip_hdr(skb);
2343
2344 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
2345 skb->len - udp_offset,
2346 IPPROTO_UDP, 0);
2347 uh->check = csum_fold(skb_checksum(skb, udp_offset,
2348 skb->len - udp_offset, 0));
2349 if (uh->check == 0)
2350 uh->check = CSUM_MANGLED_0;
2351
2352 }
2353 skb->ip_summed = CHECKSUM_NONE;
2354 } while ((skb = skb->next));
2355out:
2356 return segs;
2357}
2358
2299struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, 2359struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
2300 netdev_features_t features) 2360 netdev_features_t features)
2301{ 2361{
2302 struct sk_buff *segs = ERR_PTR(-EINVAL); 2362 struct sk_buff *segs = ERR_PTR(-EINVAL);
2303 unsigned int mss; 2363 unsigned int mss;
2304 int offset;
2305 __wsum csum;
2306
2307 mss = skb_shinfo(skb)->gso_size; 2364 mss = skb_shinfo(skb)->gso_size;
2308 if (unlikely(skb->len <= mss)) 2365 if (unlikely(skb->len <= mss))
2309 goto out; 2366 goto out;
@@ -2313,6 +2370,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
2313 int type = skb_shinfo(skb)->gso_type; 2370 int type = skb_shinfo(skb)->gso_type;
2314 2371
2315 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | 2372 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
2373 SKB_GSO_UDP_TUNNEL |
2316 SKB_GSO_GRE) || 2374 SKB_GSO_GRE) ||
2317 !(type & (SKB_GSO_UDP)))) 2375 !(type & (SKB_GSO_UDP))))
2318 goto out; 2376 goto out;
@@ -2323,20 +2381,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
2323 goto out; 2381 goto out;
2324 } 2382 }
2325 2383
2326 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
2327 * do checksum of UDP packets sent as multiple IP fragments.
2328 */
2329 offset = skb_checksum_start_offset(skb);
2330 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2331 offset += skb->csum_offset;
2332 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2333 skb->ip_summed = CHECKSUM_NONE;
2334
2335 /* Fragment the skb. IP headers of the fragments are updated in 2384 /* Fragment the skb. IP headers of the fragments are updated in
2336 * inet_gso_segment() 2385 * inet_gso_segment()
2337 */ 2386 */
2338 segs = skb_segment(skb, features); 2387 if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
2388 segs = skb_udp_tunnel_segment(skb, features);
2389 else {
2390 int offset;
2391 __wsum csum;
2392
2393 /* Do software UFO. Complete and fill in the UDP checksum as
2394 * HW cannot do checksum of UDP packets sent as multiple
2395 * IP fragments.
2396 */
2397 offset = skb_checksum_start_offset(skb);
2398 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2399 offset += skb->csum_offset;
2400 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2401 skb->ip_summed = CHECKSUM_NONE;
2402
2403 segs = skb_segment(skb, features);
2404 }
2339out: 2405out:
2340 return segs; 2406 return segs;
2341} 2407}
2342
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 505b30ad9182..7927db0a9279 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -25,7 +25,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
25 return 0; 25 return 0;
26 26
27 return inet_sk_diag_fill(sk, NULL, skb, req, 27 return inet_sk_diag_fill(sk, NULL, skb, req,
28 sk_user_ns(NETLINK_CB(cb->skb).ssk), 28 sk_user_ns(NETLINK_CB(cb->skb).sk),
29 NETLINK_CB(cb->skb).portid, 29 NETLINK_CB(cb->skb).portid,
30 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 30 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
31} 31}
@@ -64,14 +64,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
64 goto out; 64 goto out;
65 65
66 err = -ENOMEM; 66 err = -ENOMEM;
67 rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + 67 rep = nlmsg_new(sizeof(struct inet_diag_msg) +
68 sizeof(struct inet_diag_meminfo) + 68 sizeof(struct inet_diag_meminfo) + 64,
69 64)), GFP_KERNEL); 69 GFP_KERNEL);
70 if (!rep) 70 if (!rep)
71 goto out; 71 goto out;
72 72
73 err = inet_sk_diag_fill(sk, NULL, rep, req, 73 err = inet_sk_diag_fill(sk, NULL, rep, req,
74 sk_user_ns(NETLINK_CB(in_skb).ssk), 74 sk_user_ns(NETLINK_CB(in_skb).sk),
75 NETLINK_CB(in_skb).portid, 75 NETLINK_CB(in_skb).portid,
76 nlh->nlmsg_seq, 0, nlh); 76 nlh->nlmsg_seq, 0, nlh);
77 if (err < 0) { 77 if (err < 0) {
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index fe5189e2e114..eb1dd4d643f2 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
103 103
104 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); 104 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
105 105
106 /* DS disclosed */ 106 /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
107 top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos, 107 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
108 top_iph->tos = 0;
109 else
110 top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
111 top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
108 XFRM_MODE_SKB_CB(skb)->tos); 112 XFRM_MODE_SKB_CB(skb)->tos);
109 113
110 flags = x->props.flags; 114 flags = x->props.flags;
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ed0b9e2e797a..11b13ea69db4 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -156,6 +156,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION
156config IPV6_SIT 156config IPV6_SIT
157 tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)" 157 tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"
158 select INET_TUNNEL 158 select INET_TUNNEL
159 select NET_IP_TUNNEL
159 select IPV6_NDISC_NODETYPE 160 select IPV6_NDISC_NODETYPE
160 default y 161 default y
161 ---help--- 162 ---help---
@@ -201,6 +202,7 @@ config IPV6_TUNNEL
201config IPV6_GRE 202config IPV6_GRE
202 tristate "IPv6: GRE tunnel" 203 tristate "IPv6: GRE tunnel"
203 select IPV6_TUNNEL 204 select IPV6_TUNNEL
205 select NET_IP_TUNNEL
204 ---help--- 206 ---help---
205 Tunneling means encapsulating data of one protocol type within 207 Tunneling means encapsulating data of one protocol type within
206 another protocol and sending it over a channel that understands the 208 another protocol and sending it over a channel that understands the
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dae802c0af7c..d1ab6ab29a55 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -70,6 +70,7 @@
70#include <net/snmp.h> 70#include <net/snmp.h>
71 71
72#include <net/af_ieee802154.h> 72#include <net/af_ieee802154.h>
73#include <net/firewire.h>
73#include <net/ipv6.h> 74#include <net/ipv6.h>
74#include <net/protocol.h> 75#include <net/protocol.h>
75#include <net/ndisc.h> 76#include <net/ndisc.h>
@@ -419,6 +420,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
419 ipv6_regen_rndid((unsigned long) ndev); 420 ipv6_regen_rndid((unsigned long) ndev);
420 } 421 }
421#endif 422#endif
423 ndev->token = in6addr_any;
422 424
423 if (netif_running(dev) && addrconf_qdisc_ok(dev)) 425 if (netif_running(dev) && addrconf_qdisc_ok(dev))
424 ndev->if_flags |= IF_READY; 426 ndev->if_flags |= IF_READY;
@@ -542,8 +544,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
542}; 544};
543 545
544static int inet6_netconf_get_devconf(struct sk_buff *in_skb, 546static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
545 struct nlmsghdr *nlh, 547 struct nlmsghdr *nlh)
546 void *arg)
547{ 548{
548 struct net *net = sock_net(in_skb->sk); 549 struct net *net = sock_net(in_skb->sk);
549 struct nlattr *tb[NETCONFA_MAX+1]; 550 struct nlattr *tb[NETCONFA_MAX+1];
@@ -603,6 +604,77 @@ errout:
603 return err; 604 return err;
604} 605}
605 606
607static int inet6_netconf_dump_devconf(struct sk_buff *skb,
608 struct netlink_callback *cb)
609{
610 struct net *net = sock_net(skb->sk);
611 int h, s_h;
612 int idx, s_idx;
613 struct net_device *dev;
614 struct inet6_dev *idev;
615 struct hlist_head *head;
616
617 s_h = cb->args[0];
618 s_idx = idx = cb->args[1];
619
620 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
621 idx = 0;
622 head = &net->dev_index_head[h];
623 rcu_read_lock();
624 cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
625 net->dev_base_seq;
626 hlist_for_each_entry_rcu(dev, head, index_hlist) {
627 if (idx < s_idx)
628 goto cont;
629 idev = __in6_dev_get(dev);
630 if (!idev)
631 goto cont;
632
633 if (inet6_netconf_fill_devconf(skb, dev->ifindex,
634 &idev->cnf,
635 NETLINK_CB(cb->skb).portid,
636 cb->nlh->nlmsg_seq,
637 RTM_NEWNETCONF,
638 NLM_F_MULTI,
639 -1) <= 0) {
640 rcu_read_unlock();
641 goto done;
642 }
643 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
644cont:
645 idx++;
646 }
647 rcu_read_unlock();
648 }
649 if (h == NETDEV_HASHENTRIES) {
650 if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
651 net->ipv6.devconf_all,
652 NETLINK_CB(cb->skb).portid,
653 cb->nlh->nlmsg_seq,
654 RTM_NEWNETCONF, NLM_F_MULTI,
655 -1) <= 0)
656 goto done;
657 else
658 h++;
659 }
660 if (h == NETDEV_HASHENTRIES + 1) {
661 if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
662 net->ipv6.devconf_dflt,
663 NETLINK_CB(cb->skb).portid,
664 cb->nlh->nlmsg_seq,
665 RTM_NEWNETCONF, NLM_F_MULTI,
666 -1) <= 0)
667 goto done;
668 else
669 h++;
670 }
671done:
672 cb->args[0] = h;
673 cb->args[1] = idx;
674
675 return skb->len;
676}
677
606#ifdef CONFIG_SYSCTL 678#ifdef CONFIG_SYSCTL
607static void dev_forward_change(struct inet6_dev *idev) 679static void dev_forward_change(struct inet6_dev *idev)
608{ 680{
@@ -804,6 +876,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
804 ifa->prefix_len = pfxlen; 876 ifa->prefix_len = pfxlen;
805 ifa->flags = flags | IFA_F_TENTATIVE; 877 ifa->flags = flags | IFA_F_TENTATIVE;
806 ifa->cstamp = ifa->tstamp = jiffies; 878 ifa->cstamp = ifa->tstamp = jiffies;
879 ifa->tokenized = false;
807 880
808 ifa->rt = rt; 881 ifa->rt = rt;
809 882
@@ -1666,6 +1739,20 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
1666 return 0; 1739 return 0;
1667} 1740}
1668 1741
1742static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev)
1743{
1744 union fwnet_hwaddr *ha;
1745
1746 if (dev->addr_len != FWNET_ALEN)
1747 return -1;
1748
1749 ha = (union fwnet_hwaddr *)dev->dev_addr;
1750
1751 memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id));
1752 eui[0] ^= 2;
1753 return 0;
1754}
1755
1669static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) 1756static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
1670{ 1757{
1671 /* XXX: inherit EUI-64 from other interface -- yoshfuji */ 1758 /* XXX: inherit EUI-64 from other interface -- yoshfuji */
@@ -1730,6 +1817,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1730 return addrconf_ifid_gre(eui, dev); 1817 return addrconf_ifid_gre(eui, dev);
1731 case ARPHRD_IEEE802154: 1818 case ARPHRD_IEEE802154:
1732 return addrconf_ifid_eui64(eui, dev); 1819 return addrconf_ifid_eui64(eui, dev);
1820 case ARPHRD_IEEE1394:
1821 return addrconf_ifid_ieee1394(eui, dev);
1733 } 1822 }
1734 return -1; 1823 return -1;
1735} 1824}
@@ -2044,11 +2133,19 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
2044 struct inet6_ifaddr *ifp; 2133 struct inet6_ifaddr *ifp;
2045 struct in6_addr addr; 2134 struct in6_addr addr;
2046 int create = 0, update_lft = 0; 2135 int create = 0, update_lft = 0;
2136 bool tokenized = false;
2047 2137
2048 if (pinfo->prefix_len == 64) { 2138 if (pinfo->prefix_len == 64) {
2049 memcpy(&addr, &pinfo->prefix, 8); 2139 memcpy(&addr, &pinfo->prefix, 8);
2050 if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && 2140
2051 ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { 2141 if (!ipv6_addr_any(&in6_dev->token)) {
2142 read_lock_bh(&in6_dev->lock);
2143 memcpy(addr.s6_addr + 8,
2144 in6_dev->token.s6_addr + 8, 8);
2145 read_unlock_bh(&in6_dev->lock);
2146 tokenized = true;
2147 } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
2148 ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
2052 in6_dev_put(in6_dev); 2149 in6_dev_put(in6_dev);
2053 return; 2150 return;
2054 } 2151 }
@@ -2089,6 +2186,7 @@ ok:
2089 2186
2090 update_lft = create = 1; 2187 update_lft = create = 1;
2091 ifp->cstamp = jiffies; 2188 ifp->cstamp = jiffies;
2189 ifp->tokenized = tokenized;
2092 addrconf_dad_start(ifp); 2190 addrconf_dad_start(ifp);
2093 } 2191 }
2094 2192
@@ -2598,7 +2696,8 @@ static void addrconf_dev_config(struct net_device *dev)
2598 (dev->type != ARPHRD_FDDI) && 2696 (dev->type != ARPHRD_FDDI) &&
2599 (dev->type != ARPHRD_ARCNET) && 2697 (dev->type != ARPHRD_ARCNET) &&
2600 (dev->type != ARPHRD_INFINIBAND) && 2698 (dev->type != ARPHRD_INFINIBAND) &&
2601 (dev->type != ARPHRD_IEEE802154)) { 2699 (dev->type != ARPHRD_IEEE802154) &&
2700 (dev->type != ARPHRD_IEEE1394)) {
2602 /* Alas, we support only Ethernet autoconfiguration. */ 2701 /* Alas, we support only Ethernet autoconfiguration. */
2603 return; 2702 return;
2604 } 2703 }
@@ -3535,7 +3634,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
3535}; 3634};
3536 3635
3537static int 3636static int
3538inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 3637inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
3539{ 3638{
3540 struct net *net = sock_net(skb->sk); 3639 struct net *net = sock_net(skb->sk);
3541 struct ifaddrmsg *ifm; 3640 struct ifaddrmsg *ifm;
@@ -3601,7 +3700,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
3601} 3700}
3602 3701
3603static int 3702static int
3604inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 3703inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
3605{ 3704{
3606 struct net *net = sock_net(skb->sk); 3705 struct net *net = sock_net(skb->sk);
3607 struct ifaddrmsg *ifm; 3706 struct ifaddrmsg *ifm;
@@ -3832,6 +3931,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3832 NLM_F_MULTI); 3931 NLM_F_MULTI);
3833 if (err <= 0) 3932 if (err <= 0)
3834 break; 3933 break;
3934 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
3835 } 3935 }
3836 break; 3936 break;
3837 } 3937 }
@@ -3889,6 +3989,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3889 s_ip_idx = ip_idx = cb->args[2]; 3989 s_ip_idx = ip_idx = cb->args[2];
3890 3990
3891 rcu_read_lock(); 3991 rcu_read_lock();
3992 cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;
3892 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 3993 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
3893 idx = 0; 3994 idx = 0;
3894 head = &net->dev_index_head[h]; 3995 head = &net->dev_index_head[h];
@@ -3940,8 +4041,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
3940 return inet6_dump_addr(skb, cb, type); 4041 return inet6_dump_addr(skb, cb, type);
3941} 4042}
3942 4043
3943static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, 4044static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3944 void *arg)
3945{ 4045{
3946 struct net *net = sock_net(in_skb->sk); 4046 struct net *net = sock_net(in_skb->sk);
3947 struct ifaddrmsg *ifm; 4047 struct ifaddrmsg *ifm;
@@ -4074,7 +4174,8 @@ static inline size_t inet6_ifla6_size(void)
4074 + nla_total_size(sizeof(struct ifla_cacheinfo)) 4174 + nla_total_size(sizeof(struct ifla_cacheinfo))
4075 + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ 4175 + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
4076 + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ 4176 + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
4077 + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ 4177 + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
4178 + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
4078} 4179}
4079 4180
4080static inline size_t inet6_if_nlmsg_size(void) 4181static inline size_t inet6_if_nlmsg_size(void)
@@ -4161,6 +4262,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
4161 goto nla_put_failure; 4262 goto nla_put_failure;
4162 snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); 4263 snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
4163 4264
4265 nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
4266 if (nla == NULL)
4267 goto nla_put_failure;
4268 read_lock_bh(&idev->lock);
4269 memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
4270 read_unlock_bh(&idev->lock);
4271
4164 return 0; 4272 return 0;
4165 4273
4166nla_put_failure: 4274nla_put_failure:
@@ -4188,6 +4296,80 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
4188 return 0; 4296 return 0;
4189} 4297}
4190 4298
4299static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
4300{
4301 struct inet6_ifaddr *ifp;
4302 struct net_device *dev = idev->dev;
4303 bool update_rs = false;
4304
4305 if (token == NULL)
4306 return -EINVAL;
4307 if (ipv6_addr_any(token))
4308 return -EINVAL;
4309 if (dev->flags & (IFF_LOOPBACK | IFF_NOARP))
4310 return -EINVAL;
4311 if (!ipv6_accept_ra(idev))
4312 return -EINVAL;
4313 if (idev->cnf.rtr_solicits <= 0)
4314 return -EINVAL;
4315
4316 write_lock_bh(&idev->lock);
4317
4318 BUILD_BUG_ON(sizeof(token->s6_addr) != 16);
4319 memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8);
4320
4321 write_unlock_bh(&idev->lock);
4322
4323 if (!idev->dead && (idev->if_flags & IF_READY)) {
4324 struct in6_addr ll_addr;
4325
4326 ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
4327 IFA_F_OPTIMISTIC);
4328
4329 /* If we're not ready, then normal ifup will take care
4330 * of this. Otherwise, we need to request our rs here.
4331 */
4332 ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters);
4333 update_rs = true;
4334 }
4335
4336 write_lock_bh(&idev->lock);
4337
4338 if (update_rs)
4339 idev->if_flags |= IF_RS_SENT;
4340
4341 /* Well, that's kinda nasty ... */
4342 list_for_each_entry(ifp, &idev->addr_list, if_list) {
4343 spin_lock(&ifp->lock);
4344 if (ifp->tokenized) {
4345 ifp->valid_lft = 0;
4346 ifp->prefered_lft = 0;
4347 }
4348 spin_unlock(&ifp->lock);
4349 }
4350
4351 write_unlock_bh(&idev->lock);
4352 return 0;
4353}
4354
4355static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
4356{
4357 int err = -EINVAL;
4358 struct inet6_dev *idev = __in6_dev_get(dev);
4359 struct nlattr *tb[IFLA_INET6_MAX + 1];
4360
4361 if (!idev)
4362 return -EAFNOSUPPORT;
4363
4364 if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
4365 BUG();
4366
4367 if (tb[IFLA_INET6_TOKEN])
4368 err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
4369
4370 return err;
4371}
4372
4191static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 4373static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
4192 u32 portid, u32 seq, int event, unsigned int flags) 4374 u32 portid, u32 seq, int event, unsigned int flags)
4193{ 4375{
@@ -4366,6 +4548,8 @@ errout:
4366 4548
4367static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 4549static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4368{ 4550{
4551 struct net *net = dev_net(ifp->idev->dev);
4552
4369 inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); 4553 inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
4370 4554
4371 switch (event) { 4555 switch (event) {
@@ -4391,6 +4575,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4391 dst_free(&ifp->rt->dst); 4575 dst_free(&ifp->rt->dst);
4392 break; 4576 break;
4393 } 4577 }
4578 atomic_inc(&net->ipv6.dev_addr_genid);
4394} 4579}
4395 4580
4396static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 4581static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -4871,6 +5056,7 @@ static struct rtnl_af_ops inet6_ops = {
4871 .family = AF_INET6, 5056 .family = AF_INET6,
4872 .fill_link_af = inet6_fill_link_af, 5057 .fill_link_af = inet6_fill_link_af,
4873 .get_link_af_size = inet6_get_link_af_size, 5058 .get_link_af_size = inet6_get_link_af_size,
5059 .set_link_af = inet6_set_link_af,
4874}; 5060};
4875 5061
4876/* 5062/*
@@ -4943,7 +5129,7 @@ int __init addrconf_init(void)
4943 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, 5129 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
4944 inet6_dump_ifacaddr, NULL); 5130 inet6_dump_ifacaddr, NULL);
4945 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, 5131 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
4946 NULL, NULL); 5132 inet6_netconf_dump_devconf, NULL);
4947 5133
4948 ipv6_addr_label_rtnl_register(); 5134 ipv6_addr_label_rtnl_register();
4949 5135
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index aad64352cb60..f083a583a05c 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -414,8 +414,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
414 [IFAL_LABEL] = { .len = sizeof(u32), }, 414 [IFAL_LABEL] = { .len = sizeof(u32), },
415}; 415};
416 416
417static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, 417static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
418 void *arg)
419{ 418{
420 struct net *net = sock_net(skb->sk); 419 struct net *net = sock_net(skb->sk);
421 struct ifaddrlblmsg *ifal; 420 struct ifaddrlblmsg *ifal;
@@ -436,10 +435,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
436 435
437 if (!tb[IFAL_ADDRESS]) 436 if (!tb[IFAL_ADDRESS])
438 return -EINVAL; 437 return -EINVAL;
439
440 pfx = nla_data(tb[IFAL_ADDRESS]); 438 pfx = nla_data(tb[IFAL_ADDRESS]);
441 if (!pfx)
442 return -EINVAL;
443 439
444 if (!tb[IFAL_LABEL]) 440 if (!tb[IFAL_LABEL])
445 return -EINVAL; 441 return -EINVAL;
@@ -533,8 +529,7 @@ static inline int ip6addrlbl_msgsize(void)
533 + nla_total_size(4); /* IFAL_LABEL */ 529 + nla_total_size(4); /* IFAL_LABEL */
534} 530}
535 531
536static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, 532static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
537 void *arg)
538{ 533{
539 struct net *net = sock_net(in_skb->sk); 534 struct net *net = sock_net(in_skb->sk);
540 struct ifaddrlblmsg *ifal; 535 struct ifaddrlblmsg *ifal;
@@ -561,10 +556,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
561 556
562 if (!tb[IFAL_ADDRESS]) 557 if (!tb[IFAL_ADDRESS])
563 return -EINVAL; 558 return -EINVAL;
564
565 addr = nla_data(tb[IFAL_ADDRESS]); 559 addr = nla_data(tb[IFAL_ADDRESS]);
566 if (!addr)
567 return -EINVAL;
568 560
569 rcu_read_lock(); 561 rcu_read_lock();
570 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 562 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 6b793bfc0e10..ab5c7ad482cd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,7 +49,6 @@
49#include <net/udp.h> 49#include <net/udp.h>
50#include <net/udplite.h> 50#include <net/udplite.h>
51#include <net/tcp.h> 51#include <net/tcp.h>
52#include <net/ipip.h>
53#include <net/protocol.h> 52#include <net/protocol.h>
54#include <net/inet_common.h> 53#include <net/inet_common.h>
55#include <net/route.h> 54#include <net/route.h>
@@ -323,7 +322,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
323 struct net_device *dev = NULL; 322 struct net_device *dev = NULL;
324 323
325 rcu_read_lock(); 324 rcu_read_lock();
326 if (addr_type & IPV6_ADDR_LINKLOCAL) { 325 if (__ipv6_addr_needs_scope_id(addr_type)) {
327 if (addr_len >= sizeof(struct sockaddr_in6) && 326 if (addr_len >= sizeof(struct sockaddr_in6) &&
328 addr->sin6_scope_id) { 327 addr->sin6_scope_id) {
329 /* Override any existing binding, if another one 328 /* Override any existing binding, if another one
@@ -471,8 +470,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
471 470
472 sin->sin6_port = inet->inet_sport; 471 sin->sin6_port = inet->inet_sport;
473 } 472 }
474 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 473 sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
475 sin->sin6_scope_id = sk->sk_bound_dev_if; 474 sk->sk_bound_dev_if);
476 *uaddr_len = sizeof(*sin); 475 *uaddr_len = sizeof(*sin);
477 return 0; 476 return 0;
478} 477}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f5a54782a340..4b56cbbc7890 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -124,7 +124,7 @@ ipv4_connected:
124 goto out; 124 goto out;
125 } 125 }
126 126
127 if (addr_type&IPV6_ADDR_LINKLOCAL) { 127 if (__ipv6_addr_needs_scope_id(addr_type)) {
128 if (addr_len >= sizeof(struct sockaddr_in6) && 128 if (addr_len >= sizeof(struct sockaddr_in6) &&
129 usin->sin6_scope_id) { 129 usin->sin6_scope_id) {
130 if (sk->sk_bound_dev_if && 130 if (sk->sk_bound_dev_if &&
@@ -355,18 +355,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
355 sin->sin6_family = AF_INET6; 355 sin->sin6_family = AF_INET6;
356 sin->sin6_flowinfo = 0; 356 sin->sin6_flowinfo = 0;
357 sin->sin6_port = serr->port; 357 sin->sin6_port = serr->port;
358 sin->sin6_scope_id = 0;
359 if (skb->protocol == htons(ETH_P_IPV6)) { 358 if (skb->protocol == htons(ETH_P_IPV6)) {
360 const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), 359 const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
361 struct ipv6hdr, daddr); 360 struct ipv6hdr, daddr);
362 sin->sin6_addr = ip6h->daddr; 361 sin->sin6_addr = ip6h->daddr;
363 if (np->sndflow) 362 if (np->sndflow)
364 sin->sin6_flowinfo = ip6_flowinfo(ip6h); 363 sin->sin6_flowinfo = ip6_flowinfo(ip6h);
365 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 364 sin->sin6_scope_id =
366 sin->sin6_scope_id = IP6CB(skb)->iif; 365 ipv6_iface_scope_id(&sin->sin6_addr,
366 IP6CB(skb)->iif);
367 } else { 367 } else {
368 ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset), 368 ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
369 &sin->sin6_addr); 369 &sin->sin6_addr);
370 sin->sin6_scope_id = 0;
370 } 371 }
371 } 372 }
372 373
@@ -376,18 +377,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
376 if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { 377 if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
377 sin->sin6_family = AF_INET6; 378 sin->sin6_family = AF_INET6;
378 sin->sin6_flowinfo = 0; 379 sin->sin6_flowinfo = 0;
379 sin->sin6_scope_id = 0;
380 if (skb->protocol == htons(ETH_P_IPV6)) { 380 if (skb->protocol == htons(ETH_P_IPV6)) {
381 sin->sin6_addr = ipv6_hdr(skb)->saddr; 381 sin->sin6_addr = ipv6_hdr(skb)->saddr;
382 if (np->rxopt.all) 382 if (np->rxopt.all)
383 ip6_datagram_recv_ctl(sk, msg, skb); 383 ip6_datagram_recv_ctl(sk, msg, skb);
384 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 384 sin->sin6_scope_id =
385 sin->sin6_scope_id = IP6CB(skb)->iif; 385 ipv6_iface_scope_id(&sin->sin6_addr,
386 IP6CB(skb)->iif);
386 } else { 387 } else {
387 struct inet_sock *inet = inet_sk(sk); 388 struct inet_sock *inet = inet_sk(sk);
388 389
389 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, 390 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
390 &sin->sin6_addr); 391 &sin->sin6_addr);
392 sin->sin6_scope_id = 0;
391 if (inet->cmsg_flags) 393 if (inet->cmsg_flags)
392 ip_cmsg_recv(msg, skb); 394 ip_cmsg_recv(msg, skb);
393 } 395 }
@@ -592,7 +594,9 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
592 sin6.sin6_addr = ipv6_hdr(skb)->daddr; 594 sin6.sin6_addr = ipv6_hdr(skb)->daddr;
593 sin6.sin6_port = ports[1]; 595 sin6.sin6_port = ports[1];
594 sin6.sin6_flowinfo = 0; 596 sin6.sin6_flowinfo = 0;
595 sin6.sin6_scope_id = 0; 597 sin6.sin6_scope_id =
598 ipv6_iface_scope_id(&ipv6_hdr(skb)->daddr,
599 opt->iif);
596 600
597 put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); 601 put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
598 } 602 }
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index fff5bdd8b680..71b900c3f4ff 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -434,7 +434,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
434 * Source addr check 434 * Source addr check
435 */ 435 */
436 436
437 if (addr_type & IPV6_ADDR_LINKLOCAL) 437 if (__ipv6_addr_needs_scope_id(addr_type))
438 iif = skb->dev->ifindex; 438 iif = skb->dev->ifindex;
439 439
440 /* 440 /*
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 9bfab19ff3c0..e4311cbc8b4e 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -54,6 +54,10 @@ int inet6_csk_bind_conflict(const struct sock *sk,
54 if (ipv6_rcv_saddr_equal(sk, sk2)) 54 if (ipv6_rcv_saddr_equal(sk, sk2))
55 break; 55 break;
56 } 56 }
57 if (!relax && reuse && sk2->sk_reuse &&
58 sk2->sk_state != TCP_LISTEN &&
59 ipv6_rcv_saddr_equal(sk, sk2))
60 break;
57 } 61 }
58 } 62 }
59 63
@@ -169,10 +173,8 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
169 sin6->sin6_port = inet_sk(sk)->inet_dport; 173 sin6->sin6_port = inet_sk(sk)->inet_dport;
170 /* We do not store received flowlabel for TCP */ 174 /* We do not store received flowlabel for TCP */
171 sin6->sin6_flowinfo = 0; 175 sin6->sin6_flowinfo = 0;
172 sin6->sin6_scope_id = 0; 176 sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
173 if (sk->sk_bound_dev_if && 177 sk->sk_bound_dev_if);
174 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
175 sin6->sin6_scope_id = sk->sk_bound_dev_if;
176} 178}
177 179
178EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); 180EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b973ed3d06cf..46e88433ec7d 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -144,7 +144,9 @@ static void ip6_fl_gc(unsigned long dummy)
144 spin_lock(&ip6_fl_lock); 144 spin_lock(&ip6_fl_lock);
145 145
146 for (i=0; i<=FL_HASH_MASK; i++) { 146 for (i=0; i<=FL_HASH_MASK; i++) {
147 struct ip6_flowlabel *fl, **flp; 147 struct ip6_flowlabel *fl;
148 struct ip6_flowlabel __rcu **flp;
149
148 flp = &fl_ht[i]; 150 flp = &fl_ht[i];
149 while ((fl = rcu_dereference_protected(*flp, 151 while ((fl = rcu_dereference_protected(*flp,
150 lockdep_is_held(&ip6_fl_lock))) != NULL) { 152 lockdep_is_held(&ip6_fl_lock))) != NULL) {
@@ -179,7 +181,9 @@ static void __net_exit ip6_fl_purge(struct net *net)
179 181
180 spin_lock(&ip6_fl_lock); 182 spin_lock(&ip6_fl_lock);
181 for (i = 0; i <= FL_HASH_MASK; i++) { 183 for (i = 0; i <= FL_HASH_MASK; i++) {
182 struct ip6_flowlabel *fl, **flp; 184 struct ip6_flowlabel *fl;
185 struct ip6_flowlabel __rcu **flp;
186
183 flp = &fl_ht[i]; 187 flp = &fl_ht[i];
184 while ((fl = rcu_dereference_protected(*flp, 188 while ((fl = rcu_dereference_protected(*flp,
185 lockdep_is_held(&ip6_fl_lock))) != NULL) { 189 lockdep_is_held(&ip6_fl_lock))) != NULL) {
@@ -506,7 +510,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
506 struct ipv6_pinfo *np = inet6_sk(sk); 510 struct ipv6_pinfo *np = inet6_sk(sk);
507 struct in6_flowlabel_req freq; 511 struct in6_flowlabel_req freq;
508 struct ipv6_fl_socklist *sfl1=NULL; 512 struct ipv6_fl_socklist *sfl1=NULL;
509 struct ipv6_fl_socklist *sfl, **sflp; 513 struct ipv6_fl_socklist *sfl;
514 struct ipv6_fl_socklist __rcu **sflp;
510 struct ip6_flowlabel *fl, *fl1 = NULL; 515 struct ip6_flowlabel *fl, *fl1 = NULL;
511 516
512 517
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index e4efffe2522e..d3ddd8400354 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -38,6 +38,7 @@
38 38
39#include <net/sock.h> 39#include <net/sock.h>
40#include <net/ip.h> 40#include <net/ip.h>
41#include <net/ip_tunnels.h>
41#include <net/icmp.h> 42#include <net/icmp.h>
42#include <net/protocol.h> 43#include <net/protocol.h>
43#include <net/addrconf.h> 44#include <net/addrconf.h>
@@ -110,46 +111,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr)
110#define tunnels_l tunnels[1] 111#define tunnels_l tunnels[1]
111#define tunnels_wc tunnels[0] 112#define tunnels_wc tunnels[0]
112 113
113static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
114 struct rtnl_link_stats64 *tot)
115{
116 int i;
117
118 for_each_possible_cpu(i) {
119 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
120 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
121 unsigned int start;
122
123 do {
124 start = u64_stats_fetch_begin_bh(&tstats->syncp);
125 rx_packets = tstats->rx_packets;
126 tx_packets = tstats->tx_packets;
127 rx_bytes = tstats->rx_bytes;
128 tx_bytes = tstats->tx_bytes;
129 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
130
131 tot->rx_packets += rx_packets;
132 tot->tx_packets += tx_packets;
133 tot->rx_bytes += rx_bytes;
134 tot->tx_bytes += tx_bytes;
135 }
136
137 tot->multicast = dev->stats.multicast;
138 tot->rx_crc_errors = dev->stats.rx_crc_errors;
139 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
140 tot->rx_length_errors = dev->stats.rx_length_errors;
141 tot->rx_frame_errors = dev->stats.rx_frame_errors;
142 tot->rx_errors = dev->stats.rx_errors;
143
144 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
145 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
146 tot->tx_dropped = dev->stats.tx_dropped;
147 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
148 tot->tx_errors = dev->stats.tx_errors;
149
150 return tot;
151}
152
153/* Given src, dst and key, find appropriate for input tunnel. */ 114/* Given src, dst and key, find appropriate for input tunnel. */
154 115
155static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, 116static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
@@ -667,7 +628,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
667 struct net_device_stats *stats = &tunnel->dev->stats; 628 struct net_device_stats *stats = &tunnel->dev->stats;
668 int err = -1; 629 int err = -1;
669 u8 proto; 630 u8 proto;
670 int pkt_len;
671 struct sk_buff *new_skb; 631 struct sk_buff *new_skb;
672 632
673 if (dev->type == ARPHRD_ETHER) 633 if (dev->type == ARPHRD_ETHER)
@@ -801,23 +761,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
801 } 761 }
802 } 762 }
803 763
804 nf_reset(skb); 764 ip6tunnel_xmit(skb, dev);
805 pkt_len = skb->len;
806 err = ip6_local_out(skb);
807
808 if (net_xmit_eval(err) == 0) {
809 struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
810
811 tstats->tx_bytes += pkt_len;
812 tstats->tx_packets++;
813 } else {
814 stats->tx_errors++;
815 stats->tx_aborted_errors++;
816 }
817
818 if (ndst) 765 if (ndst)
819 ip6_tnl_dst_store(tunnel, ndst); 766 ip6_tnl_dst_store(tunnel, ndst);
820
821 return 0; 767 return 0;
822tx_err_link_failure: 768tx_err_link_failure:
823 stats->tx_carrier_errors++; 769 stats->tx_carrier_errors++;
@@ -1271,7 +1217,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
1271 .ndo_start_xmit = ip6gre_tunnel_xmit, 1217 .ndo_start_xmit = ip6gre_tunnel_xmit,
1272 .ndo_do_ioctl = ip6gre_tunnel_ioctl, 1218 .ndo_do_ioctl = ip6gre_tunnel_ioctl,
1273 .ndo_change_mtu = ip6gre_tunnel_change_mtu, 1219 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1274 .ndo_get_stats64 = ip6gre_get_stats64, 1220 .ndo_get_stats64 = ip_tunnel_get_stats64,
1275}; 1221};
1276 1222
1277static void ip6gre_dev_free(struct net_device *dev) 1223static void ip6gre_dev_free(struct net_device *dev)
@@ -1520,7 +1466,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
1520 .ndo_set_mac_address = eth_mac_addr, 1466 .ndo_set_mac_address = eth_mac_addr,
1521 .ndo_validate_addr = eth_validate_addr, 1467 .ndo_validate_addr = eth_validate_addr,
1522 .ndo_change_mtu = ip6gre_tunnel_change_mtu, 1468 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1523 .ndo_get_stats64 = ip6gre_get_stats64, 1469 .ndo_get_stats64 = ip_tunnel_get_stats64,
1524}; 1470};
1525 1471
1526static void ip6gre_tap_setup(struct net_device *dev) 1472static void ip6gre_tap_setup(struct net_device *dev)
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 8234c1dcdf72..71b766ee821d 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -92,14 +92,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
92 u8 *prevhdr; 92 u8 *prevhdr;
93 int offset = 0; 93 int offset = 0;
94 94
95 if (!(features & NETIF_F_V6_CSUM))
96 features &= ~NETIF_F_SG;
97
98 if (unlikely(skb_shinfo(skb)->gso_type & 95 if (unlikely(skb_shinfo(skb)->gso_type &
99 ~(SKB_GSO_UDP | 96 ~(SKB_GSO_UDP |
100 SKB_GSO_DODGY | 97 SKB_GSO_DODGY |
101 SKB_GSO_TCP_ECN | 98 SKB_GSO_TCP_ECN |
102 SKB_GSO_GRE | 99 SKB_GSO_GRE |
100 SKB_GSO_UDP_TUNNEL |
103 SKB_GSO_TCPV6 | 101 SKB_GSO_TCPV6 |
104 0))) 102 0)))
105 goto out; 103 goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 155eccfa7760..d2eedf192330 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1224,11 +1224,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1224 } 1224 }
1225 1225
1226 /* For UDP, check if TX timestamp is enabled */ 1226 /* For UDP, check if TX timestamp is enabled */
1227 if (sk->sk_type == SOCK_DGRAM) { 1227 if (sk->sk_type == SOCK_DGRAM)
1228 err = sock_tx_timestamp(sk, &tx_flags); 1228 sock_tx_timestamp(sk, &tx_flags);
1229 if (err)
1230 goto error;
1231 }
1232 1229
1233 /* 1230 /*
1234 * Let's try using as much space as possible. 1231 * Let's try using as much space as possible.
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index fff83cbc197f..1e55866cead7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -47,6 +47,7 @@
47 47
48#include <net/icmp.h> 48#include <net/icmp.h>
49#include <net/ip.h> 49#include <net/ip.h>
50#include <net/ip_tunnels.h>
50#include <net/ipv6.h> 51#include <net/ipv6.h>
51#include <net/ip6_route.h> 52#include <net/ip6_route.h>
52#include <net/addrconf.h> 53#include <net/addrconf.h>
@@ -955,7 +956,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
955 unsigned int max_headroom = sizeof(struct ipv6hdr); 956 unsigned int max_headroom = sizeof(struct ipv6hdr);
956 u8 proto; 957 u8 proto;
957 int err = -1; 958 int err = -1;
958 int pkt_len;
959 959
960 if (!fl6->flowi6_mark) 960 if (!fl6->flowi6_mark)
961 dst = ip6_tnl_dst_check(t); 961 dst = ip6_tnl_dst_check(t);
@@ -1035,19 +1035,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1035 ipv6h->nexthdr = proto; 1035 ipv6h->nexthdr = proto;
1036 ipv6h->saddr = fl6->saddr; 1036 ipv6h->saddr = fl6->saddr;
1037 ipv6h->daddr = fl6->daddr; 1037 ipv6h->daddr = fl6->daddr;
1038 nf_reset(skb); 1038 ip6tunnel_xmit(skb, dev);
1039 pkt_len = skb->len;
1040 err = ip6_local_out(skb);
1041
1042 if (net_xmit_eval(err) == 0) {
1043 struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats);
1044
1045 tstats->tx_bytes += pkt_len;
1046 tstats->tx_packets++;
1047 } else {
1048 stats->tx_errors++;
1049 stats->tx_aborted_errors++;
1050 }
1051 if (ndst) 1039 if (ndst)
1052 ip6_tnl_dst_store(t, ndst); 1040 ip6_tnl_dst_store(t, ndst);
1053 return 0; 1041 return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 96bfb4e4b820..241fb8ad9fcf 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -842,9 +842,9 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
842 if (ipv6_hdr(skb)->version == 0) { 842 if (ipv6_hdr(skb)->version == 0) {
843 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 843 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
844 nlh->nlmsg_type = NLMSG_ERROR; 844 nlh->nlmsg_type = NLMSG_ERROR;
845 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 845 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
846 skb_trim(skb, nlh->nlmsg_len); 846 skb_trim(skb, nlh->nlmsg_len);
847 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; 847 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
848 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 848 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
849 } else 849 } else
850 kfree_skb(skb); 850 kfree_skb(skb);
@@ -1100,13 +1100,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1100 if (ipv6_hdr(skb)->version == 0) { 1100 if (ipv6_hdr(skb)->version == 0) {
1101 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 1101 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1102 1102
1103 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 1103 if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1104 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1104 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1105 } else { 1105 } else {
1106 nlh->nlmsg_type = NLMSG_ERROR; 1106 nlh->nlmsg_type = NLMSG_ERROR;
1107 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 1107 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1108 skb_trim(skb, nlh->nlmsg_len); 1108 skb_trim(skb, nlh->nlmsg_len);
1109 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 1109 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1110 } 1110 }
1111 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1111 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1112 } else 1112 } else
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 76ef4353d518..2712ab22a174 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -610,8 +610,6 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
610 } 610 }
611 } 611 }
612#endif 612#endif
613 if (!dev->addr_len)
614 send_sllao = 0;
615 if (send_sllao) 613 if (send_sllao)
616 optlen += ndisc_opt_addr_space(dev); 614 optlen += ndisc_opt_addr_space(dev);
617 615
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 429089cb073d..72836f40b730 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -1,3 +1,9 @@
1/*
2 * IPv6 specific functions of netfilter core
3 *
4 * Rusty Russell (C) 2000 -- This code is GPL.
5 * Patrick McHardy (C) 2006-2012
6 */
1#include <linux/kernel.h> 7#include <linux/kernel.h>
2#include <linux/init.h> 8#include <linux/init.h>
3#include <linux/ipv6.h> 9#include <linux/ipv6.h>
@@ -29,7 +35,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
29 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 35 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
30 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); 36 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
31 dst_release(dst); 37 dst_release(dst);
32 return -EINVAL; 38 return dst->error;
33 } 39 }
34 40
35 /* Drop old route. */ 41 /* Drop old route. */
@@ -43,7 +49,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
43 skb_dst_set(skb, NULL); 49 skb_dst_set(skb, NULL);
44 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0); 50 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
45 if (IS_ERR(dst)) 51 if (IS_ERR(dst))
46 return -1; 52 return PTR_ERR(dst);
47 skb_dst_set(skb, dst); 53 skb_dst_set(skb, dst);
48 } 54 }
49#endif 55#endif
@@ -53,7 +59,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
53 if (skb_headroom(skb) < hh_len && 59 if (skb_headroom(skb) < hh_len &&
54 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 60 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
55 0, GFP_ATOMIC)) 61 0, GFP_ATOMIC))
56 return -1; 62 return -ENOMEM;
57 63
58 return 0; 64 return 0;
59} 65}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index c72532a60d88..4433ab40e7de 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -105,7 +105,7 @@ config IP6_NF_MATCH_MH
105 105
106config IP6_NF_MATCH_RPFILTER 106config IP6_NF_MATCH_RPFILTER
107 tristate '"rpfilter" reverse path filter match support' 107 tristate '"rpfilter" reverse path filter match support'
108 depends on NETFILTER_ADVANCED 108 depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW)
109 ---help--- 109 ---help---
110 This option allows you to match packets whose replies would 110 This option allows you to match packets whose replies would
111 go out via the interface the packet came in. 111 go out via the interface the packet came in.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 341b54ade72c..44400c216dc6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling 4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> 5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6 * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -284,6 +285,7 @@ static void trace_packet(const struct sk_buff *skb,
284 const char *hookname, *chainname, *comment; 285 const char *hookname, *chainname, *comment;
285 const struct ip6t_entry *iter; 286 const struct ip6t_entry *iter;
286 unsigned int rulenum = 0; 287 unsigned int rulenum = 0;
288 struct net *net = dev_net(in ? in : out);
287 289
288 table_base = private->entries[smp_processor_id()]; 290 table_base = private->entries[smp_processor_id()];
289 root = get_entry(table_base, private->hook_entry[hook]); 291 root = get_entry(table_base, private->hook_entry[hook]);
@@ -296,7 +298,7 @@ static void trace_packet(const struct sk_buff *skb,
296 &chainname, &comment, &rulenum) != 0) 298 &chainname, &comment, &rulenum) != 0)
297 break; 299 break;
298 300
299 nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, 301 nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
300 "TRACE: %s:%s:%s:%u ", 302 "TRACE: %s:%s:%s:%u ",
301 tablename, chainname, comment, rulenum); 303 tablename, chainname, comment, rulenum);
302} 304}
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index cb631143721c..590f767db5d4 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -18,9 +18,8 @@
18static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) 18static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
19{ 19{
20 struct ip6t_npt_tginfo *npt = par->targinfo; 20 struct ip6t_npt_tginfo *npt = par->targinfo;
21 __wsum src_sum = 0, dst_sum = 0;
22 struct in6_addr pfx; 21 struct in6_addr pfx;
23 unsigned int i; 22 __wsum src_sum, dst_sum;
24 23
25 if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) 24 if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
26 return -EINVAL; 25 return -EINVAL;
@@ -33,12 +32,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
33 if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) 32 if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
34 return -EINVAL; 33 return -EINVAL;
35 34
36 for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { 35 src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0);
37 src_sum = csum_add(src_sum, 36 dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0);
38 (__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
39 dst_sum = csum_add(dst_sum,
40 (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
41 }
42 37
43 npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); 38 npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
44 return 0; 39 return 0;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index ed3b427b2841..70f9abc0efe9 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -7,6 +7,8 @@
7 * Authors: 7 * Authors:
8 * Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp> 8 * Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
9 * 9 *
10 * Copyright (c) 2005-2007 Patrick McHardy <kaber@trash.net>
11 *
10 * Based on net/ipv4/netfilter/ipt_REJECT.c 12 * Based on net/ipv4/netfilter/ipt_REJECT.c
11 * 13 *
12 * This program is free software; you can redistribute it and/or 14 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 6134a1ebfb1b..e075399d8b72 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -38,7 +38,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
38 struct in6_addr saddr, daddr; 38 struct in6_addr saddr, daddr;
39 u_int8_t hop_limit; 39 u_int8_t hop_limit;
40 u_int32_t flowlabel, mark; 40 u_int32_t flowlabel, mark;
41 41 int err;
42#if 0 42#if 0
43 /* root is playing with raw sockets. */ 43 /* root is playing with raw sockets. */
44 if (skb->len < sizeof(struct iphdr) || 44 if (skb->len < sizeof(struct iphdr) ||
@@ -65,8 +65,11 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
65 !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || 65 !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
66 skb->mark != mark || 66 skb->mark != mark ||
67 ipv6_hdr(skb)->hop_limit != hop_limit || 67 ipv6_hdr(skb)->hop_limit != hop_limit ||
68 flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) 68 flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
69 return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; 69 err = ip6_route_me_harder(skb);
70 if (err < 0)
71 ret = NF_DROP_ERR(err);
72 }
70 73
71 return ret; 74 return ret;
72} 75}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index e0e788d25b14..6383f90efda8 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -179,6 +179,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
179#ifdef CONFIG_XFRM 179#ifdef CONFIG_XFRM
180 const struct nf_conn *ct; 180 const struct nf_conn *ct;
181 enum ip_conntrack_info ctinfo; 181 enum ip_conntrack_info ctinfo;
182 int err;
182#endif 183#endif
183 unsigned int ret; 184 unsigned int ret;
184 185
@@ -197,9 +198,11 @@ nf_nat_ipv6_out(unsigned int hooknum,
197 &ct->tuplehash[!dir].tuple.dst.u3) || 198 &ct->tuplehash[!dir].tuple.dst.u3) ||
198 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && 199 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
199 ct->tuplehash[dir].tuple.src.u.all != 200 ct->tuplehash[dir].tuple.src.u.all !=
200 ct->tuplehash[!dir].tuple.dst.u.all)) 201 ct->tuplehash[!dir].tuple.dst.u.all)) {
201 if (nf_xfrm_me_harder(skb, AF_INET6) < 0) 202 err = nf_xfrm_me_harder(skb, AF_INET6);
202 ret = NF_DROP; 203 if (err < 0)
204 ret = NF_DROP_ERR(err);
205 }
203 } 206 }
204#endif 207#endif
205 return ret; 208 return ret;
@@ -215,6 +218,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
215 const struct nf_conn *ct; 218 const struct nf_conn *ct;
216 enum ip_conntrack_info ctinfo; 219 enum ip_conntrack_info ctinfo;
217 unsigned int ret; 220 unsigned int ret;
221 int err;
218 222
219 /* root is playing with raw sockets. */ 223 /* root is playing with raw sockets. */
220 if (skb->len < sizeof(struct ipv6hdr)) 224 if (skb->len < sizeof(struct ipv6hdr))
@@ -227,16 +231,19 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
227 231
228 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, 232 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
229 &ct->tuplehash[!dir].tuple.src.u3)) { 233 &ct->tuplehash[!dir].tuple.src.u3)) {
230 if (ip6_route_me_harder(skb)) 234 err = ip6_route_me_harder(skb);
231 ret = NF_DROP; 235 if (err < 0)
236 ret = NF_DROP_ERR(err);
232 } 237 }
233#ifdef CONFIG_XFRM 238#ifdef CONFIG_XFRM
234 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && 239 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
235 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && 240 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
236 ct->tuplehash[dir].tuple.dst.u.all != 241 ct->tuplehash[dir].tuple.dst.u.all !=
237 ct->tuplehash[!dir].tuple.src.u.all) 242 ct->tuplehash[!dir].tuple.src.u.all) {
238 if (nf_xfrm_me_harder(skb, AF_INET6)) 243 err = nf_xfrm_me_harder(skb, AF_INET6);
239 ret = NF_DROP; 244 if (err < 0)
245 ret = NF_DROP_ERR(err);
246 }
240#endif 247#endif
241 } 248 }
242 return ret; 249 return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 2b6c226f5198..97bcf2bae857 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -330,12 +330,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
330 sizeof(sin6.sin6_addr)); 330 sizeof(sin6.sin6_addr));
331 331
332 nf_ct_put(ct); 332 nf_ct_put(ct);
333 333 sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
334 if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) 334 sk->sk_bound_dev_if);
335 sin6.sin6_scope_id = sk->sk_bound_dev_if;
336 else
337 sin6.sin6_scope_id = 0;
338
339 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; 335 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
340} 336}
341 337
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 24df3dde0076..b3807c5cb888 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
131 type + 128); 131 type + 128);
132 nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); 132 nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
133 if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) 133 if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
134 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, 134 nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
135 NULL, NULL,
135 "nf_ct_icmpv6: invalid new with type %d ", 136 "nf_ct_icmpv6: invalid new with type %d ",
136 type + 128); 137 type + 128);
137 return false; 138 return false;
@@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
203 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); 204 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
204 if (icmp6h == NULL) { 205 if (icmp6h == NULL) {
205 if (LOG_INVALID(net, IPPROTO_ICMPV6)) 206 if (LOG_INVALID(net, IPPROTO_ICMPV6))
206 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, 207 nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
207 "nf_ct_icmpv6: short packet "); 208 "nf_ct_icmpv6: short packet ");
208 return -NF_ACCEPT; 209 return -NF_ACCEPT;
209 } 210 }
@@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
211 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 212 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
212 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { 213 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
213 if (LOG_INVALID(net, IPPROTO_ICMPV6)) 214 if (LOG_INVALID(net, IPPROTO_ICMPV6))
214 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, 215 nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
215 "nf_ct_icmpv6: ICMPv6 checksum failed "); 216 "nf_ct_icmpv6: ICMPv6 checksum failed ");
216 return -NF_ACCEPT; 217 return -NF_ACCEPT;
217 } 218 }
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6700069949dd..dffdc1a389c5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -41,6 +41,7 @@
41#include <net/rawv6.h> 41#include <net/rawv6.h>
42#include <net/ndisc.h> 42#include <net/ndisc.h>
43#include <net/addrconf.h> 43#include <net/addrconf.h>
44#include <net/inet_ecn.h>
44#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 45#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
45#include <linux/sysctl.h> 46#include <linux/sysctl.h>
46#include <linux/netfilter.h> 47#include <linux/netfilter.h>
@@ -138,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
138} 139}
139#endif 140#endif
140 141
142static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
143{
144 return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
145}
146
141static unsigned int nf_hashfn(struct inet_frag_queue *q) 147static unsigned int nf_hashfn(struct inet_frag_queue *q)
142{ 148{
143 const struct frag_queue *nq; 149 const struct frag_queue *nq;
@@ -166,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data)
166/* Creation primitives. */ 172/* Creation primitives. */
167static inline struct frag_queue *fq_find(struct net *net, __be32 id, 173static inline struct frag_queue *fq_find(struct net *net, __be32 id,
168 u32 user, struct in6_addr *src, 174 u32 user, struct in6_addr *src,
169 struct in6_addr *dst) 175 struct in6_addr *dst, u8 ecn)
170{ 176{
171 struct inet_frag_queue *q; 177 struct inet_frag_queue *q;
172 struct ip6_create_arg arg; 178 struct ip6_create_arg arg;
@@ -176,6 +182,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
176 arg.user = user; 182 arg.user = user;
177 arg.src = src; 183 arg.src = src;
178 arg.dst = dst; 184 arg.dst = dst;
185 arg.ecn = ecn;
179 186
180 read_lock_bh(&nf_frags.lock); 187 read_lock_bh(&nf_frags.lock);
181 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); 188 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
@@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
196 struct sk_buff *prev, *next; 203 struct sk_buff *prev, *next;
197 unsigned int payload_len; 204 unsigned int payload_len;
198 int offset, end; 205 int offset, end;
206 u8 ecn;
199 207
200 if (fq->q.last_in & INET_FRAG_COMPLETE) { 208 if (fq->q.last_in & INET_FRAG_COMPLETE) {
201 pr_debug("Already completed\n"); 209 pr_debug("Already completed\n");
@@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
213 return -1; 221 return -1;
214 } 222 }
215 223
224 ecn = ip6_frag_ecn(ipv6_hdr(skb));
225
216 if (skb->ip_summed == CHECKSUM_COMPLETE) { 226 if (skb->ip_summed == CHECKSUM_COMPLETE) {
217 const unsigned char *nh = skb_network_header(skb); 227 const unsigned char *nh = skb_network_header(skb);
218 skb->csum = csum_sub(skb->csum, 228 skb->csum = csum_sub(skb->csum,
@@ -317,6 +327,7 @@ found:
317 } 327 }
318 fq->q.stamp = skb->tstamp; 328 fq->q.stamp = skb->tstamp;
319 fq->q.meat += skb->len; 329 fq->q.meat += skb->len;
330 fq->ecn |= ecn;
320 if (payload_len > fq->q.max_size) 331 if (payload_len > fq->q.max_size)
321 fq->q.max_size = payload_len; 332 fq->q.max_size = payload_len;
322 add_frag_mem_limit(&fq->q, skb->truesize); 333 add_frag_mem_limit(&fq->q, skb->truesize);
@@ -352,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
352{ 363{
353 struct sk_buff *fp, *op, *head = fq->q.fragments; 364 struct sk_buff *fp, *op, *head = fq->q.fragments;
354 int payload_len; 365 int payload_len;
366 u8 ecn;
355 367
356 inet_frag_kill(&fq->q, &nf_frags); 368 inet_frag_kill(&fq->q, &nf_frags);
357 369
358 WARN_ON(head == NULL); 370 WARN_ON(head == NULL);
359 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); 371 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
360 372
373 ecn = ip_frag_ecn_table[fq->ecn];
374 if (unlikely(ecn == 0xff))
375 goto out_fail;
376
361 /* Unfragmented part is taken from the first segment. */ 377 /* Unfragmented part is taken from the first segment. */
362 payload_len = ((head->data - skb_network_header(head)) - 378 payload_len = ((head->data - skb_network_header(head)) -
363 sizeof(struct ipv6hdr) + fq->q.len - 379 sizeof(struct ipv6hdr) + fq->q.len -
@@ -428,6 +444,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
428 head->dev = dev; 444 head->dev = dev;
429 head->tstamp = fq->q.stamp; 445 head->tstamp = fq->q.stamp;
430 ipv6_hdr(head)->payload_len = htons(payload_len); 446 ipv6_hdr(head)->payload_len = htons(payload_len);
447 ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
431 IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; 448 IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
432 449
433 /* Yes, and fold redundant checksum back. 8) */ 450 /* Yes, and fold redundant checksum back. 8) */
@@ -572,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
572 inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); 589 inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
573 local_bh_enable(); 590 local_bh_enable();
574 591
575 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); 592 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
593 ip6_frag_ecn(hdr));
576 if (fq == NULL) { 594 if (fq == NULL) {
577 pr_debug("Can't find and can't create new queue\n"); 595 pr_debug("Can't find and can't create new queue\n");
578 goto ret_orig; 596 goto ret_orig;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 330b5e7b7df6..eedff8ccded5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -263,7 +263,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
263 if (addr_type != IPV6_ADDR_ANY) { 263 if (addr_type != IPV6_ADDR_ANY) {
264 struct net_device *dev = NULL; 264 struct net_device *dev = NULL;
265 265
266 if (addr_type & IPV6_ADDR_LINKLOCAL) { 266 if (__ipv6_addr_needs_scope_id(addr_type)) {
267 if (addr_len >= sizeof(struct sockaddr_in6) && 267 if (addr_len >= sizeof(struct sockaddr_in6) &&
268 addr->sin6_scope_id) { 268 addr->sin6_scope_id) {
269 /* Override any existing binding, if another 269 /* Override any existing binding, if another
@@ -498,9 +498,8 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
498 sin6->sin6_port = 0; 498 sin6->sin6_port = 0;
499 sin6->sin6_addr = ipv6_hdr(skb)->saddr; 499 sin6->sin6_addr = ipv6_hdr(skb)->saddr;
500 sin6->sin6_flowinfo = 0; 500 sin6->sin6_flowinfo = 0;
501 sin6->sin6_scope_id = 0; 501 sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
502 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 502 IP6CB(skb)->iif);
503 sin6->sin6_scope_id = IP6CB(skb)->iif;
504 } 503 }
505 504
506 sock_recv_ts_and_drops(msg, sk, skb); 505 sock_recv_ts_and_drops(msg, sk, skb);
@@ -802,7 +801,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
802 801
803 if (addr_len >= sizeof(struct sockaddr_in6) && 802 if (addr_len >= sizeof(struct sockaddr_in6) &&
804 sin6->sin6_scope_id && 803 sin6->sin6_scope_id &&
805 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 804 __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
806 fl6.flowi6_oif = sin6->sin6_scope_id; 805 fl6.flowi6_oif = sin6->sin6_scope_id;
807 } else { 806 } else {
808 if (sk->sk_state != TCP_ESTABLISHED) 807 if (sk->sk_state != TCP_ESTABLISHED)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0ba10e53a629..790d9f4b8b0b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -58,6 +58,7 @@
58#include <net/ndisc.h> 58#include <net/ndisc.h>
59#include <net/addrconf.h> 59#include <net/addrconf.h>
60#include <net/inet_frag.h> 60#include <net/inet_frag.h>
61#include <net/inet_ecn.h>
61 62
62struct ip6frag_skb_cb 63struct ip6frag_skb_cb
63{ 64{
@@ -67,6 +68,10 @@ struct ip6frag_skb_cb
67 68
68#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) 69#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
69 70
71static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
72{
73 return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
74}
70 75
71static struct inet_frags ip6_frags; 76static struct inet_frags ip6_frags;
72 77
@@ -119,6 +124,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
119 fq->user = arg->user; 124 fq->user = arg->user;
120 fq->saddr = *arg->src; 125 fq->saddr = *arg->src;
121 fq->daddr = *arg->dst; 126 fq->daddr = *arg->dst;
127 fq->ecn = arg->ecn;
122} 128}
123EXPORT_SYMBOL(ip6_frag_init); 129EXPORT_SYMBOL(ip6_frag_init);
124 130
@@ -173,7 +179,8 @@ static void ip6_frag_expire(unsigned long data)
173} 179}
174 180
175static __inline__ struct frag_queue * 181static __inline__ struct frag_queue *
176fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst) 182fq_find(struct net *net, __be32 id, const struct in6_addr *src,
183 const struct in6_addr *dst, u8 ecn)
177{ 184{
178 struct inet_frag_queue *q; 185 struct inet_frag_queue *q;
179 struct ip6_create_arg arg; 186 struct ip6_create_arg arg;
@@ -183,6 +190,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6
183 arg.user = IP6_DEFRAG_LOCAL_DELIVER; 190 arg.user = IP6_DEFRAG_LOCAL_DELIVER;
184 arg.src = src; 191 arg.src = src;
185 arg.dst = dst; 192 arg.dst = dst;
193 arg.ecn = ecn;
186 194
187 read_lock(&ip6_frags.lock); 195 read_lock(&ip6_frags.lock);
188 hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); 196 hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
@@ -202,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
202 struct net_device *dev; 210 struct net_device *dev;
203 int offset, end; 211 int offset, end;
204 struct net *net = dev_net(skb_dst(skb)->dev); 212 struct net *net = dev_net(skb_dst(skb)->dev);
213 u8 ecn;
205 214
206 if (fq->q.last_in & INET_FRAG_COMPLETE) 215 if (fq->q.last_in & INET_FRAG_COMPLETE)
207 goto err; 216 goto err;
@@ -219,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
219 return -1; 228 return -1;
220 } 229 }
221 230
231 ecn = ip6_frag_ecn(ipv6_hdr(skb));
232
222 if (skb->ip_summed == CHECKSUM_COMPLETE) { 233 if (skb->ip_summed == CHECKSUM_COMPLETE) {
223 const unsigned char *nh = skb_network_header(skb); 234 const unsigned char *nh = skb_network_header(skb);
224 skb->csum = csum_sub(skb->csum, 235 skb->csum = csum_sub(skb->csum,
@@ -319,6 +330,7 @@ found:
319 } 330 }
320 fq->q.stamp = skb->tstamp; 331 fq->q.stamp = skb->tstamp;
321 fq->q.meat += skb->len; 332 fq->q.meat += skb->len;
333 fq->ecn |= ecn;
322 add_frag_mem_limit(&fq->q, skb->truesize); 334 add_frag_mem_limit(&fq->q, skb->truesize);
323 335
324 /* The first fragment. 336 /* The first fragment.
@@ -370,9 +382,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
370 int payload_len; 382 int payload_len;
371 unsigned int nhoff; 383 unsigned int nhoff;
372 int sum_truesize; 384 int sum_truesize;
385 u8 ecn;
373 386
374 inet_frag_kill(&fq->q, &ip6_frags); 387 inet_frag_kill(&fq->q, &ip6_frags);
375 388
389 ecn = ip_frag_ecn_table[fq->ecn];
390 if (unlikely(ecn == 0xff))
391 goto out_fail;
392
376 /* Make the one we just received the head. */ 393 /* Make the one we just received the head. */
377 if (prev) { 394 if (prev) {
378 head = prev->next; 395 head = prev->next;
@@ -471,6 +488,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
471 head->dev = dev; 488 head->dev = dev;
472 head->tstamp = fq->q.stamp; 489 head->tstamp = fq->q.stamp;
473 ipv6_hdr(head)->payload_len = htons(payload_len); 490 ipv6_hdr(head)->payload_len = htons(payload_len);
491 ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
474 IP6CB(head)->nhoff = nhoff; 492 IP6CB(head)->nhoff = nhoff;
475 493
476 /* Yes, and fold redundant checksum back. 8) */ 494 /* Yes, and fold redundant checksum back. 8) */
@@ -534,7 +552,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
534 IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 552 IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
535 IPSTATS_MIB_REASMFAILS, evicted); 553 IPSTATS_MIB_REASMFAILS, evicted);
536 554
537 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); 555 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
556 ip6_frag_ecn(hdr));
538 if (fq != NULL) { 557 if (fq != NULL) {
539 int ret; 558 int ret;
540 559
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e5fe0041adfa..ad0aa6b0b86a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2355,7 +2355,7 @@ beginning:
2355 return last_err; 2355 return last_err;
2356} 2356}
2357 2357
2358static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2358static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2359{ 2359{
2360 struct fib6_config cfg; 2360 struct fib6_config cfg;
2361 int err; 2361 int err;
@@ -2370,7 +2370,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
2370 return ip6_route_del(&cfg); 2370 return ip6_route_del(&cfg);
2371} 2371}
2372 2372
2373static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2373static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2374{ 2374{
2375 struct fib6_config cfg; 2375 struct fib6_config cfg;
2376 int err; 2376 int err;
@@ -2562,7 +2562,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2562 prefix, 0, NLM_F_MULTI); 2562 prefix, 0, NLM_F_MULTI);
2563} 2563}
2564 2564
2565static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2565static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2566{ 2566{
2567 struct net *net = sock_net(in_skb->sk); 2567 struct net *net = sock_net(in_skb->sk);
2568 struct nlattr *tb[RTA_MAX+1]; 2568 struct nlattr *tb[RTA_MAX+1];
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 02f96dcbcf02..335363478bbf 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -49,7 +49,7 @@
49#include <net/ip.h> 49#include <net/ip.h>
50#include <net/udp.h> 50#include <net/udp.h>
51#include <net/icmp.h> 51#include <net/icmp.h>
52#include <net/ipip.h> 52#include <net/ip_tunnels.h>
53#include <net/inet_ecn.h> 53#include <net/inet_ecn.h>
54#include <net/xfrm.h> 54#include <net/xfrm.h>
55#include <net/dsfield.h> 55#include <net/dsfield.h>
@@ -87,41 +87,6 @@ struct sit_net {
87 struct net_device *fb_tunnel_dev; 87 struct net_device *fb_tunnel_dev;
88}; 88};
89 89
90static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev,
91 struct rtnl_link_stats64 *tot)
92{
93 int i;
94
95 for_each_possible_cpu(i) {
96 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
97 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
98 unsigned int start;
99
100 do {
101 start = u64_stats_fetch_begin_bh(&tstats->syncp);
102 rx_packets = tstats->rx_packets;
103 tx_packets = tstats->tx_packets;
104 rx_bytes = tstats->rx_bytes;
105 tx_bytes = tstats->tx_bytes;
106 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
107
108 tot->rx_packets += rx_packets;
109 tot->tx_packets += tx_packets;
110 tot->rx_bytes += rx_bytes;
111 tot->tx_bytes += tx_bytes;
112 }
113
114 tot->rx_errors = dev->stats.rx_errors;
115 tot->rx_frame_errors = dev->stats.rx_frame_errors;
116 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
117 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
118 tot->tx_dropped = dev->stats.tx_dropped;
119 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
120 tot->tx_errors = dev->stats.tx_errors;
121
122 return tot;
123}
124
125/* 90/*
126 * Must be invoked with rcu_read_lock 91 * Must be invoked with rcu_read_lock
127 */ 92 */
@@ -899,6 +864,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
899 if ((iph->ttl = tiph->ttl) == 0) 864 if ((iph->ttl = tiph->ttl) == 0)
900 iph->ttl = iph6->hop_limit; 865 iph->ttl = iph6->hop_limit;
901 866
867 skb->ip_summed = CHECKSUM_NONE;
868 ip_select_ident(iph, skb_dst(skb), NULL);
902 iptunnel_xmit(skb, dev); 869 iptunnel_xmit(skb, dev);
903 return NETDEV_TX_OK; 870 return NETDEV_TX_OK;
904 871
@@ -1200,7 +1167,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
1200 .ndo_start_xmit = ipip6_tunnel_xmit, 1167 .ndo_start_xmit = ipip6_tunnel_xmit,
1201 .ndo_do_ioctl = ipip6_tunnel_ioctl, 1168 .ndo_do_ioctl = ipip6_tunnel_ioctl,
1202 .ndo_change_mtu = ipip6_tunnel_change_mtu, 1169 .ndo_change_mtu = ipip6_tunnel_change_mtu,
1203 .ndo_get_stats64= ipip6_get_stats64, 1170 .ndo_get_stats64 = ip_tunnel_get_stats64,
1204}; 1171};
1205 1172
1206static void ipip6_dev_free(struct net_device *dev) 1173static void ipip6_dev_free(struct net_device *dev)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 8a0848b60b35..d5dda20bd717 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
149struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) 149struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
150{ 150{
151 struct tcp_options_received tcp_opt; 151 struct tcp_options_received tcp_opt;
152 const u8 *hash_location;
153 struct inet_request_sock *ireq; 152 struct inet_request_sock *ireq;
154 struct inet6_request_sock *ireq6; 153 struct inet6_request_sock *ireq6;
155 struct tcp_request_sock *treq; 154 struct tcp_request_sock *treq;
@@ -177,7 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
177 176
178 /* check for timestamp cookie support */ 177 /* check for timestamp cookie support */
179 memset(&tcp_opt, 0, sizeof(tcp_opt)); 178 memset(&tcp_opt, 0, sizeof(tcp_opt));
180 tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); 179 tcp_parse_options(skb, &tcp_opt, 0, NULL);
181 180
182 if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) 181 if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
183 goto out; 182 goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 46a5be85be87..e51bd1a58264 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -462,7 +462,6 @@ out:
462static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, 462static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
463 struct flowi6 *fl6, 463 struct flowi6 *fl6,
464 struct request_sock *req, 464 struct request_sock *req,
465 struct request_values *rvp,
466 u16 queue_mapping) 465 u16 queue_mapping)
467{ 466{
468 struct inet6_request_sock *treq = inet6_rsk(req); 467 struct inet6_request_sock *treq = inet6_rsk(req);
@@ -474,7 +473,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
474 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) 473 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
475 goto done; 474 goto done;
476 475
477 skb = tcp_make_synack(sk, dst, req, rvp, NULL); 476 skb = tcp_make_synack(sk, dst, req, NULL);
478 477
479 if (skb) { 478 if (skb) {
480 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); 479 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
@@ -489,13 +488,12 @@ done:
489 return err; 488 return err;
490} 489}
491 490
492static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, 491static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
493 struct request_values *rvp)
494{ 492{
495 struct flowi6 fl6; 493 struct flowi6 fl6;
496 int res; 494 int res;
497 495
498 res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); 496 res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
499 if (!res) 497 if (!res)
500 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 498 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
501 return res; 499 return res;
@@ -948,9 +946,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
948 */ 946 */
949static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 947static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
950{ 948{
951 struct tcp_extend_values tmp_ext;
952 struct tcp_options_received tmp_opt; 949 struct tcp_options_received tmp_opt;
953 const u8 *hash_location;
954 struct request_sock *req; 950 struct request_sock *req;
955 struct inet6_request_sock *treq; 951 struct inet6_request_sock *treq;
956 struct ipv6_pinfo *np = inet6_sk(sk); 952 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -988,50 +984,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
988 tcp_clear_options(&tmp_opt); 984 tcp_clear_options(&tmp_opt);
989 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 985 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
990 tmp_opt.user_mss = tp->rx_opt.user_mss; 986 tmp_opt.user_mss = tp->rx_opt.user_mss;
991 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 987 tcp_parse_options(skb, &tmp_opt, 0, NULL);
992
993 if (tmp_opt.cookie_plus > 0 &&
994 tmp_opt.saw_tstamp &&
995 !tp->rx_opt.cookie_out_never &&
996 (sysctl_tcp_cookie_size > 0 ||
997 (tp->cookie_values != NULL &&
998 tp->cookie_values->cookie_desired > 0))) {
999 u8 *c;
1000 u32 *d;
1001 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1002 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1003
1004 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1005 goto drop_and_free;
1006
1007 /* Secret recipe starts with IP addresses */
1008 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1009 *mess++ ^= *d++;
1010 *mess++ ^= *d++;
1011 *mess++ ^= *d++;
1012 *mess++ ^= *d++;
1013 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1014 *mess++ ^= *d++;
1015 *mess++ ^= *d++;
1016 *mess++ ^= *d++;
1017 *mess++ ^= *d++;
1018
1019 /* plus variable length Initiator Cookie */
1020 c = (u8 *)mess;
1021 while (l-- > 0)
1022 *c++ ^= *hash_location++;
1023
1024 want_cookie = false; /* not our kind of cookie */
1025 tmp_ext.cookie_out_never = 0; /* false */
1026 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1027 } else if (!tp->rx_opt.cookie_in_always) {
1028 /* redundant indications, but ensure initialization. */
1029 tmp_ext.cookie_out_never = 1; /* true */
1030 tmp_ext.cookie_plus = 0;
1031 } else {
1032 goto drop_and_free;
1033 }
1034 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1035 988
1036 if (want_cookie && !tmp_opt.saw_tstamp) 989 if (want_cookie && !tmp_opt.saw_tstamp)
1037 tcp_clear_options(&tmp_opt); 990 tcp_clear_options(&tmp_opt);
@@ -1109,7 +1062,6 @@ have_isn:
1109 goto drop_and_release; 1062 goto drop_and_release;
1110 1063
1111 if (tcp_v6_send_synack(sk, dst, &fl6, req, 1064 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1112 (struct request_values *)&tmp_ext,
1113 skb_get_queue_mapping(skb)) || 1065 skb_get_queue_mapping(skb)) ||
1114 want_cookie) 1066 want_cookie)
1115 goto drop_and_free; 1067 goto drop_and_free;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d8e5e852fc7a..da6019b63730 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -450,15 +450,16 @@ try_again:
450 sin6->sin6_family = AF_INET6; 450 sin6->sin6_family = AF_INET6;
451 sin6->sin6_port = udp_hdr(skb)->source; 451 sin6->sin6_port = udp_hdr(skb)->source;
452 sin6->sin6_flowinfo = 0; 452 sin6->sin6_flowinfo = 0;
453 sin6->sin6_scope_id = 0;
454 453
455 if (is_udp4) 454 if (is_udp4) {
456 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, 455 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
457 &sin6->sin6_addr); 456 &sin6->sin6_addr);
458 else { 457 sin6->sin6_scope_id = 0;
458 } else {
459 sin6->sin6_addr = ipv6_hdr(skb)->saddr; 459 sin6->sin6_addr = ipv6_hdr(skb)->saddr;
460 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 460 sin6->sin6_scope_id =
461 sin6->sin6_scope_id = IP6CB(skb)->iif; 461 ipv6_iface_scope_id(&sin6->sin6_addr,
462 IP6CB(skb)->iif);
462 } 463 }
463 464
464 } 465 }
@@ -1118,7 +1119,7 @@ do_udp_sendmsg:
1118 1119
1119 if (addr_len >= sizeof(struct sockaddr_in6) && 1120 if (addr_len >= sizeof(struct sockaddr_in6) &&
1120 sin6->sin6_scope_id && 1121 sin6->sin6_scope_id &&
1121 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 1122 __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
1122 fl6.flowi6_oif = sin6->sin6_scope_id; 1123 fl6.flowi6_oif = sin6->sin6_scope_id;
1123 } else { 1124 } else {
1124 if (sk->sk_state != TCP_ESTABLISHED) 1125 if (sk->sk_state != TCP_ESTABLISHED)
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index cf05cf073c51..3bb3a891a424 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
21 const struct ipv6hdr *ipv6h; 21 const struct ipv6hdr *ipv6h;
22 struct udphdr *uh; 22 struct udphdr *uh;
23 23
24 /* UDP Tunnel offload on ipv6 is not yet supported. */
25 if (skb->encapsulation)
26 return -EINVAL;
27
24 if (!pskb_may_pull(skb, sizeof(*uh))) 28 if (!pskb_may_pull(skb, sizeof(*uh)))
25 return -EINVAL; 29 return -EINVAL;
26 30
@@ -56,7 +60,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
56 /* Packet is from an untrusted source, reset gso_segs. */ 60 /* Packet is from an untrusted source, reset gso_segs. */
57 int type = skb_shinfo(skb)->gso_type; 61 int type = skb_shinfo(skb)->gso_type;
58 62
59 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | 63 if (unlikely(type & ~(SKB_GSO_UDP |
64 SKB_GSO_DODGY |
65 SKB_GSO_UDP_TUNNEL |
60 SKB_GSO_GRE) || 66 SKB_GSO_GRE) ||
61 !(type & (SKB_GSO_UDP)))) 67 !(type & (SKB_GSO_UDP))))
62 goto out; 68 goto out;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9bf6a74a71d2..4770d515c2c8 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
49 sizeof(top_iph->flow_lbl)); 49 sizeof(top_iph->flow_lbl));
50 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); 50 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
51 51
52 dsfield = XFRM_MODE_SKB_CB(skb)->tos; 52 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
53 dsfield = INET_ECN_encapsulate(dsfield, dsfield); 53 dsfield = 0;
54 else
55 dsfield = XFRM_MODE_SKB_CB(skb)->tos;
56 dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
54 if (x->props.flags & XFRM_STATE_NOECN) 57 if (x->props.flags & XFRM_STATE_NOECN)
55 dsfield &= ~INET_ECN_MASK; 58 dsfield &= ~INET_ECN_MASK;
56 ipv6_change_dsfield(top_iph, 0, dsfield); 59 ipv6_change_dsfield(top_iph, 0, dsfield);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index e493b3397ae3..0578d4fa00a9 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -305,8 +305,7 @@ static void irda_connect_response(struct irda_sock *self)
305 305
306 IRDA_DEBUG(2, "%s()\n", __func__); 306 IRDA_DEBUG(2, "%s()\n", __func__);
307 307
308 skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, 308 skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL);
309 GFP_ATOMIC);
310 if (skb == NULL) { 309 if (skb == NULL) {
311 IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n", 310 IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n",
312 __func__); 311 __func__);
@@ -1120,7 +1119,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
1120 } 1119 }
1121 1120
1122 /* Allocate networking socket */ 1121 /* Allocate networking socket */
1123 sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto); 1122 sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto);
1124 if (sk == NULL) 1123 if (sk == NULL)
1125 return -ENOMEM; 1124 return -ENOMEM;
1126 1125
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 52079f19bbbe..b797daac063c 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -117,7 +117,7 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)
117 117
118 IRDA_ASSERT(ircomm != NULL, return NULL;); 118 IRDA_ASSERT(ircomm != NULL, return NULL;);
119 119
120 self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC); 120 self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL);
121 if (self == NULL) 121 if (self == NULL)
122 return NULL; 122 return NULL;
123 123
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 206ce6db2c36..ae691651b721 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1461,7 +1461,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
1461 return iucv_accept_poll(sk); 1461 return iucv_accept_poll(sk);
1462 1462
1463 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 1463 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
1464 mask |= POLLERR; 1464 mask |= POLLERR |
1465 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
1465 1466
1466 if (sk->sk_shutdown & RCV_SHUTDOWN) 1467 if (sk->sk_shutdown & RCV_SHUTDOWN)
1467 mask |= POLLRDHUP; 1468 mask |= POLLRDHUP;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8aecf5df6656..6984c3a353cd 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1777,7 +1777,7 @@ int l2tp_session_delete(struct l2tp_session *session)
1777 if (session->session_close != NULL) 1777 if (session->session_close != NULL)
1778 (*session->session_close)(session); 1778 (*session->session_close)(session);
1779 if (session->deref) 1779 if (session->deref)
1780 (*session->ref)(session); 1780 (*session->deref)(session);
1781 l2tp_session_dec_refcount(session); 1781 l2tp_session_dec_refcount(session);
1782 return 0; 1782 return 0;
1783} 1783}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a6893602f87a..c50c19402588 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -175,7 +175,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
175 * add it to the device after the station. 175 * add it to the device after the station.
176 */ 176 */
177 if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { 177 if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) {
178 ieee80211_key_free(sdata->local, key); 178 ieee80211_key_free_unused(key);
179 err = -ENOENT; 179 err = -ENOENT;
180 goto out_unlock; 180 goto out_unlock;
181 } 181 }
@@ -214,8 +214,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
214 } 214 }
215 215
216 err = ieee80211_key_link(key, sdata, sta); 216 err = ieee80211_key_link(key, sdata, sta);
217 if (err)
218 ieee80211_key_free(sdata->local, key);
219 217
220 out_unlock: 218 out_unlock:
221 mutex_unlock(&sdata->local->sta_mtx); 219 mutex_unlock(&sdata->local->sta_mtx);
@@ -254,7 +252,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
254 goto out_unlock; 252 goto out_unlock;
255 } 253 }
256 254
257 __ieee80211_key_free(key); 255 ieee80211_key_free(key, true);
258 256
259 ret = 0; 257 ret = 0;
260 out_unlock: 258 out_unlock:
@@ -445,12 +443,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
445 struct ieee80211_sub_if_data *sdata = sta->sdata; 443 struct ieee80211_sub_if_data *sdata = sta->sdata;
446 struct ieee80211_local *local = sdata->local; 444 struct ieee80211_local *local = sdata->local;
447 struct timespec uptime; 445 struct timespec uptime;
446 u64 packets = 0;
447 int ac;
448 448
449 sinfo->generation = sdata->local->sta_generation; 449 sinfo->generation = sdata->local->sta_generation;
450 450
451 sinfo->filled = STATION_INFO_INACTIVE_TIME | 451 sinfo->filled = STATION_INFO_INACTIVE_TIME |
452 STATION_INFO_RX_BYTES | 452 STATION_INFO_RX_BYTES64 |
453 STATION_INFO_TX_BYTES | 453 STATION_INFO_TX_BYTES64 |
454 STATION_INFO_RX_PACKETS | 454 STATION_INFO_RX_PACKETS |
455 STATION_INFO_TX_PACKETS | 455 STATION_INFO_TX_PACKETS |
456 STATION_INFO_TX_RETRIES | 456 STATION_INFO_TX_RETRIES |
@@ -467,10 +467,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
467 sinfo->connected_time = uptime.tv_sec - sta->last_connected; 467 sinfo->connected_time = uptime.tv_sec - sta->last_connected;
468 468
469 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); 469 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
470 sinfo->tx_bytes = 0;
471 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
472 sinfo->tx_bytes += sta->tx_bytes[ac];
473 packets += sta->tx_packets[ac];
474 }
475 sinfo->tx_packets = packets;
470 sinfo->rx_bytes = sta->rx_bytes; 476 sinfo->rx_bytes = sta->rx_bytes;
471 sinfo->tx_bytes = sta->tx_bytes;
472 sinfo->rx_packets = sta->rx_packets; 477 sinfo->rx_packets = sta->rx_packets;
473 sinfo->tx_packets = sta->tx_packets;
474 sinfo->tx_retries = sta->tx_retry_count; 478 sinfo->tx_retries = sta->tx_retry_count;
475 sinfo->tx_failed = sta->tx_retry_failed; 479 sinfo->tx_failed = sta->tx_retry_failed;
476 sinfo->rx_dropped_misc = sta->rx_dropped; 480 sinfo->rx_dropped_misc = sta->rx_dropped;
@@ -598,8 +602,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
598 data[i++] += sta->rx_fragments; \ 602 data[i++] += sta->rx_fragments; \
599 data[i++] += sta->rx_dropped; \ 603 data[i++] += sta->rx_dropped; \
600 \ 604 \
601 data[i++] += sta->tx_packets; \ 605 data[i++] += sinfo.tx_packets; \
602 data[i++] += sta->tx_bytes; \ 606 data[i++] += sinfo.tx_bytes; \
603 data[i++] += sta->tx_fragments; \ 607 data[i++] += sta->tx_fragments; \
604 data[i++] += sta->tx_filtered_count; \ 608 data[i++] += sta->tx_filtered_count; \
605 data[i++] += sta->tx_retry_failed; \ 609 data[i++] += sta->tx_retry_failed; \
@@ -621,13 +625,14 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
621 if (!(sta && !WARN_ON(sta->sdata->dev != dev))) 625 if (!(sta && !WARN_ON(sta->sdata->dev != dev)))
622 goto do_survey; 626 goto do_survey;
623 627
628 sinfo.filled = 0;
629 sta_set_sinfo(sta, &sinfo);
630
624 i = 0; 631 i = 0;
625 ADD_STA_STATS(sta); 632 ADD_STA_STATS(sta);
626 633
627 data[i++] = sta->sta_state; 634 data[i++] = sta->sta_state;
628 635
629 sinfo.filled = 0;
630 sta_set_sinfo(sta, &sinfo);
631 636
632 if (sinfo.filled & STATION_INFO_TX_BITRATE) 637 if (sinfo.filled & STATION_INFO_TX_BITRATE)
633 data[i] = 100000 * 638 data[i] = 100000 *
@@ -1035,9 +1040,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
1035 sta_info_flush_defer(vlan); 1040 sta_info_flush_defer(vlan);
1036 sta_info_flush_defer(sdata); 1041 sta_info_flush_defer(sdata);
1037 rcu_barrier(); 1042 rcu_barrier();
1038 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) 1043 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
1039 sta_info_flush_cleanup(vlan); 1044 sta_info_flush_cleanup(vlan);
1045 ieee80211_free_keys(vlan);
1046 }
1040 sta_info_flush_cleanup(sdata); 1047 sta_info_flush_cleanup(sdata);
1048 ieee80211_free_keys(sdata);
1041 1049
1042 sdata->vif.bss_conf.enable_beacon = false; 1050 sdata->vif.bss_conf.enable_beacon = false;
1043 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); 1051 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
@@ -1177,6 +1185,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1177 mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); 1185 mask |= BIT(NL80211_STA_FLAG_ASSOCIATED);
1178 if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) 1186 if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED))
1179 set |= BIT(NL80211_STA_FLAG_ASSOCIATED); 1187 set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
1188 } else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
1189 /*
1190 * TDLS -- everything follows authorized, but
1191 * only becoming authorized is possible, not
1192 * going back
1193 */
1194 if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
1195 set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) |
1196 BIT(NL80211_STA_FLAG_ASSOCIATED);
1197 mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) |
1198 BIT(NL80211_STA_FLAG_ASSOCIATED);
1199 }
1180 } 1200 }
1181 1201
1182 ret = sta_apply_auth_flags(local, sta, mask, set); 1202 ret = sta_apply_auth_flags(local, sta, mask, set);
@@ -1261,7 +1281,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1261 if (ieee80211_vif_is_mesh(&sdata->vif)) { 1281 if (ieee80211_vif_is_mesh(&sdata->vif)) {
1262#ifdef CONFIG_MAC80211_MESH 1282#ifdef CONFIG_MAC80211_MESH
1263 u32 changed = 0; 1283 u32 changed = 0;
1264 if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) { 1284
1285 if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) {
1265 switch (params->plink_state) { 1286 switch (params->plink_state) {
1266 case NL80211_PLINK_ESTAB: 1287 case NL80211_PLINK_ESTAB:
1267 if (sta->plink_state != NL80211_PLINK_ESTAB) 1288 if (sta->plink_state != NL80211_PLINK_ESTAB)
@@ -1292,15 +1313,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1292 /* nothing */ 1313 /* nothing */
1293 break; 1314 break;
1294 } 1315 }
1295 } else { 1316 }
1296 switch (params->plink_action) { 1317
1297 case PLINK_ACTION_OPEN: 1318 switch (params->plink_action) {
1298 changed |= mesh_plink_open(sta); 1319 case NL80211_PLINK_ACTION_NO_ACTION:
1299 break; 1320 /* nothing */
1300 case PLINK_ACTION_BLOCK: 1321 break;
1301 changed |= mesh_plink_block(sta); 1322 case NL80211_PLINK_ACTION_OPEN:
1302 break; 1323 changed |= mesh_plink_open(sta);
1303 } 1324 break;
1325 case NL80211_PLINK_ACTION_BLOCK:
1326 changed |= mesh_plink_block(sta);
1327 break;
1304 } 1328 }
1305 1329
1306 if (params->local_pm) 1330 if (params->local_pm)
@@ -1346,8 +1370,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
1346 * defaults -- if userspace wants something else we'll 1370 * defaults -- if userspace wants something else we'll
1347 * change it accordingly in sta_apply_parameters() 1371 * change it accordingly in sta_apply_parameters()
1348 */ 1372 */
1349 sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); 1373 if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
1350 sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); 1374 sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
1375 sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
1376 }
1351 1377
1352 err = sta_apply_parameters(local, sta, params); 1378 err = sta_apply_parameters(local, sta, params);
1353 if (err) { 1379 if (err) {
@@ -1356,8 +1382,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
1356 } 1382 }
1357 1383
1358 /* 1384 /*
1359 * for TDLS, rate control should be initialized only when supported 1385 * for TDLS, rate control should be initialized only when
1360 * rates are known. 1386 * rates are known and station is marked authorized
1361 */ 1387 */
1362 if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) 1388 if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER))
1363 rate_control_rate_init(sta); 1389 rate_control_rate_init(sta);
@@ -1394,50 +1420,67 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
1394} 1420}
1395 1421
1396static int ieee80211_change_station(struct wiphy *wiphy, 1422static int ieee80211_change_station(struct wiphy *wiphy,
1397 struct net_device *dev, 1423 struct net_device *dev, u8 *mac,
1398 u8 *mac,
1399 struct station_parameters *params) 1424 struct station_parameters *params)
1400{ 1425{
1401 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1426 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1402 struct ieee80211_local *local = wiphy_priv(wiphy); 1427 struct ieee80211_local *local = wiphy_priv(wiphy);
1403 struct sta_info *sta; 1428 struct sta_info *sta;
1404 struct ieee80211_sub_if_data *vlansdata; 1429 struct ieee80211_sub_if_data *vlansdata;
1430 enum cfg80211_station_type statype;
1405 int err; 1431 int err;
1406 1432
1407 mutex_lock(&local->sta_mtx); 1433 mutex_lock(&local->sta_mtx);
1408 1434
1409 sta = sta_info_get_bss(sdata, mac); 1435 sta = sta_info_get_bss(sdata, mac);
1410 if (!sta) { 1436 if (!sta) {
1411 mutex_unlock(&local->sta_mtx); 1437 err = -ENOENT;
1412 return -ENOENT; 1438 goto out_err;
1413 } 1439 }
1414 1440
1415 /* in station mode, some updates are only valid with TDLS */ 1441 switch (sdata->vif.type) {
1416 if (sdata->vif.type == NL80211_IFTYPE_STATION && 1442 case NL80211_IFTYPE_MESH_POINT:
1417 (params->supported_rates || params->ht_capa || params->vht_capa || 1443 if (sdata->u.mesh.user_mpm)
1418 params->sta_modify_mask || 1444 statype = CFG80211_STA_MESH_PEER_USER;
1419 (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME))) && 1445 else
1420 !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { 1446 statype = CFG80211_STA_MESH_PEER_KERNEL;
1421 mutex_unlock(&local->sta_mtx); 1447 break;
1422 return -EINVAL; 1448 case NL80211_IFTYPE_ADHOC:
1449 statype = CFG80211_STA_IBSS;
1450 break;
1451 case NL80211_IFTYPE_STATION:
1452 if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
1453 statype = CFG80211_STA_AP_STA;
1454 break;
1455 }
1456 if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
1457 statype = CFG80211_STA_TDLS_PEER_ACTIVE;
1458 else
1459 statype = CFG80211_STA_TDLS_PEER_SETUP;
1460 break;
1461 case NL80211_IFTYPE_AP:
1462 case NL80211_IFTYPE_AP_VLAN:
1463 statype = CFG80211_STA_AP_CLIENT;
1464 break;
1465 default:
1466 err = -EOPNOTSUPP;
1467 goto out_err;
1423 } 1468 }
1424 1469
1470 err = cfg80211_check_station_change(wiphy, params, statype);
1471 if (err)
1472 goto out_err;
1473
1425 if (params->vlan && params->vlan != sta->sdata->dev) { 1474 if (params->vlan && params->vlan != sta->sdata->dev) {
1426 bool prev_4addr = false; 1475 bool prev_4addr = false;
1427 bool new_4addr = false; 1476 bool new_4addr = false;
1428 1477
1429 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); 1478 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
1430 1479
1431 if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
1432 vlansdata->vif.type != NL80211_IFTYPE_AP) {
1433 mutex_unlock(&local->sta_mtx);
1434 return -EINVAL;
1435 }
1436
1437 if (params->vlan->ieee80211_ptr->use_4addr) { 1480 if (params->vlan->ieee80211_ptr->use_4addr) {
1438 if (vlansdata->u.vlan.sta) { 1481 if (vlansdata->u.vlan.sta) {
1439 mutex_unlock(&local->sta_mtx); 1482 err = -EBUSY;
1440 return -EBUSY; 1483 goto out_err;
1441 } 1484 }
1442 1485
1443 rcu_assign_pointer(vlansdata->u.vlan.sta, sta); 1486 rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
@@ -1464,12 +1507,12 @@ static int ieee80211_change_station(struct wiphy *wiphy,
1464 } 1507 }
1465 1508
1466 err = sta_apply_parameters(local, sta, params); 1509 err = sta_apply_parameters(local, sta, params);
1467 if (err) { 1510 if (err)
1468 mutex_unlock(&local->sta_mtx); 1511 goto out_err;
1469 return err;
1470 }
1471 1512
1472 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && params->supported_rates) 1513 /* When peer becomes authorized, init rate control as well */
1514 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
1515 test_sta_flag(sta, WLAN_STA_AUTHORIZED))
1473 rate_control_rate_init(sta); 1516 rate_control_rate_init(sta);
1474 1517
1475 mutex_unlock(&local->sta_mtx); 1518 mutex_unlock(&local->sta_mtx);
@@ -1479,7 +1522,11 @@ static int ieee80211_change_station(struct wiphy *wiphy,
1479 ieee80211_recalc_ps(local, -1); 1522 ieee80211_recalc_ps(local, -1);
1480 ieee80211_recalc_ps_vif(sdata); 1523 ieee80211_recalc_ps_vif(sdata);
1481 } 1524 }
1525
1482 return 0; 1526 return 0;
1527out_err:
1528 mutex_unlock(&local->sta_mtx);
1529 return err;
1483} 1530}
1484 1531
1485#ifdef CONFIG_MAC80211_MESH 1532#ifdef CONFIG_MAC80211_MESH
@@ -1687,6 +1734,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
1687 ifmsh->mesh_sp_id = setup->sync_method; 1734 ifmsh->mesh_sp_id = setup->sync_method;
1688 ifmsh->mesh_pp_id = setup->path_sel_proto; 1735 ifmsh->mesh_pp_id = setup->path_sel_proto;
1689 ifmsh->mesh_pm_id = setup->path_metric; 1736 ifmsh->mesh_pm_id = setup->path_metric;
1737 ifmsh->user_mpm = setup->user_mpm;
1690 ifmsh->security = IEEE80211_MESH_SEC_NONE; 1738 ifmsh->security = IEEE80211_MESH_SEC_NONE;
1691 if (setup->is_authenticated) 1739 if (setup->is_authenticated)
1692 ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; 1740 ifmsh->security |= IEEE80211_MESH_SEC_AUTHED;
@@ -1730,8 +1778,11 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
1730 conf->dot11MeshTTL = nconf->dot11MeshTTL; 1778 conf->dot11MeshTTL = nconf->dot11MeshTTL;
1731 if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask)) 1779 if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask))
1732 conf->element_ttl = nconf->element_ttl; 1780 conf->element_ttl = nconf->element_ttl;
1733 if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) 1781 if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) {
1782 if (ifmsh->user_mpm)
1783 return -EBUSY;
1734 conf->auto_open_plinks = nconf->auto_open_plinks; 1784 conf->auto_open_plinks = nconf->auto_open_plinks;
1785 }
1735 if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask)) 1786 if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask))
1736 conf->dot11MeshNbrOffsetMaxNeighbor = 1787 conf->dot11MeshNbrOffsetMaxNeighbor =
1737 nconf->dot11MeshNbrOffsetMaxNeighbor; 1788 nconf->dot11MeshNbrOffsetMaxNeighbor;
@@ -2371,7 +2422,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2371 struct ieee80211_sub_if_data *sdata, 2422 struct ieee80211_sub_if_data *sdata,
2372 struct ieee80211_channel *channel, 2423 struct ieee80211_channel *channel,
2373 unsigned int duration, u64 *cookie, 2424 unsigned int duration, u64 *cookie,
2374 struct sk_buff *txskb) 2425 struct sk_buff *txskb,
2426 enum ieee80211_roc_type type)
2375{ 2427{
2376 struct ieee80211_roc_work *roc, *tmp; 2428 struct ieee80211_roc_work *roc, *tmp;
2377 bool queued = false; 2429 bool queued = false;
@@ -2390,6 +2442,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2390 roc->duration = duration; 2442 roc->duration = duration;
2391 roc->req_duration = duration; 2443 roc->req_duration = duration;
2392 roc->frame = txskb; 2444 roc->frame = txskb;
2445 roc->type = type;
2393 roc->mgmt_tx_cookie = (unsigned long)txskb; 2446 roc->mgmt_tx_cookie = (unsigned long)txskb;
2394 roc->sdata = sdata; 2447 roc->sdata = sdata;
2395 INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); 2448 INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work);
@@ -2420,7 +2473,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2420 if (!duration) 2473 if (!duration)
2421 duration = 10; 2474 duration = 10;
2422 2475
2423 ret = drv_remain_on_channel(local, sdata, channel, duration); 2476 ret = drv_remain_on_channel(local, sdata, channel, duration, type);
2424 if (ret) { 2477 if (ret) {
2425 kfree(roc); 2478 kfree(roc);
2426 return ret; 2479 return ret;
@@ -2439,10 +2492,13 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2439 * 2492 *
2440 * If it hasn't started yet, just increase the duration 2493 * If it hasn't started yet, just increase the duration
2441 * and add the new one to the list of dependents. 2494 * and add the new one to the list of dependents.
2495 * If the type of the new ROC has higher priority, modify the
2496 * type of the previous one to match that of the new one.
2442 */ 2497 */
2443 if (!tmp->started) { 2498 if (!tmp->started) {
2444 list_add_tail(&roc->list, &tmp->dependents); 2499 list_add_tail(&roc->list, &tmp->dependents);
2445 tmp->duration = max(tmp->duration, roc->duration); 2500 tmp->duration = max(tmp->duration, roc->duration);
2501 tmp->type = max(tmp->type, roc->type);
2446 queued = true; 2502 queued = true;
2447 break; 2503 break;
2448 } 2504 }
@@ -2454,16 +2510,18 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2454 /* 2510 /*
2455 * In the offloaded ROC case, if it hasn't begun, add 2511 * In the offloaded ROC case, if it hasn't begun, add
2456 * this new one to the dependent list to be handled 2512 * this new one to the dependent list to be handled
2457 * when the the master one begins. If it has begun, 2513 * when the master one begins. If it has begun,
2458 * check that there's still a minimum time left and 2514 * check that there's still a minimum time left and
2459 * if so, start this one, transmitting the frame, but 2515 * if so, start this one, transmitting the frame, but
2460 * add it to the list directly after this one with a 2516 * add it to the list directly after this one with
2461 * a reduced time so we'll ask the driver to execute 2517 * a reduced time so we'll ask the driver to execute
2462 * it right after finishing the previous one, in the 2518 * it right after finishing the previous one, in the
2463 * hope that it'll also be executed right afterwards, 2519 * hope that it'll also be executed right afterwards,
2464 * effectively extending the old one. 2520 * effectively extending the old one.
2465 * If there's no minimum time left, just add it to the 2521 * If there's no minimum time left, just add it to the
2466 * normal list. 2522 * normal list.
2523 * TODO: the ROC type is ignored here, assuming that it
2524 * is better to immediately use the current ROC.
2467 */ 2525 */
2468 if (!tmp->hw_begun) { 2526 if (!tmp->hw_begun) {
2469 list_add_tail(&roc->list, &tmp->dependents); 2527 list_add_tail(&roc->list, &tmp->dependents);
@@ -2557,7 +2615,8 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy,
2557 2615
2558 mutex_lock(&local->mtx); 2616 mutex_lock(&local->mtx);
2559 ret = ieee80211_start_roc_work(local, sdata, chan, 2617 ret = ieee80211_start_roc_work(local, sdata, chan,
2560 duration, cookie, NULL); 2618 duration, cookie, NULL,
2619 IEEE80211_ROC_TYPE_NORMAL);
2561 mutex_unlock(&local->mtx); 2620 mutex_unlock(&local->mtx);
2562 2621
2563 return ret; 2622 return ret;
@@ -2792,7 +2851,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
2792 2851
2793 /* This will handle all kinds of coalescing and immediate TX */ 2852 /* This will handle all kinds of coalescing and immediate TX */
2794 ret = ieee80211_start_roc_work(local, sdata, chan, 2853 ret = ieee80211_start_roc_work(local, sdata, chan,
2795 wait, cookie, skb); 2854 wait, cookie, skb,
2855 IEEE80211_ROC_TYPE_MGMT_TX);
2796 if (ret) 2856 if (ret)
2797 kfree_skb(skb); 2857 kfree_skb(skb);
2798 out_unlock: 2858 out_unlock:
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index c3a3082b72e5..1521cabad3d6 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -295,7 +295,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
295 char buf[50]; 295 char buf[50];
296 struct ieee80211_key *key; 296 struct ieee80211_key *key;
297 297
298 if (!sdata->debugfs.dir) 298 if (!sdata->vif.debugfs_dir)
299 return; 299 return;
300 300
301 lockdep_assert_held(&sdata->local->key_mtx); 301 lockdep_assert_held(&sdata->local->key_mtx);
@@ -311,7 +311,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
311 sprintf(buf, "../keys/%d", key->debugfs.cnt); 311 sprintf(buf, "../keys/%d", key->debugfs.cnt);
312 sdata->debugfs.default_unicast_key = 312 sdata->debugfs.default_unicast_key =
313 debugfs_create_symlink("default_unicast_key", 313 debugfs_create_symlink("default_unicast_key",
314 sdata->debugfs.dir, buf); 314 sdata->vif.debugfs_dir, buf);
315 } 315 }
316 316
317 if (sdata->debugfs.default_multicast_key) { 317 if (sdata->debugfs.default_multicast_key) {
@@ -325,7 +325,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
325 sprintf(buf, "../keys/%d", key->debugfs.cnt); 325 sprintf(buf, "../keys/%d", key->debugfs.cnt);
326 sdata->debugfs.default_multicast_key = 326 sdata->debugfs.default_multicast_key =
327 debugfs_create_symlink("default_multicast_key", 327 debugfs_create_symlink("default_multicast_key",
328 sdata->debugfs.dir, buf); 328 sdata->vif.debugfs_dir, buf);
329 } 329 }
330} 330}
331 331
@@ -334,7 +334,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
334 char buf[50]; 334 char buf[50];
335 struct ieee80211_key *key; 335 struct ieee80211_key *key;
336 336
337 if (!sdata->debugfs.dir) 337 if (!sdata->vif.debugfs_dir)
338 return; 338 return;
339 339
340 key = key_mtx_dereference(sdata->local, 340 key = key_mtx_dereference(sdata->local,
@@ -343,7 +343,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
343 sprintf(buf, "../keys/%d", key->debugfs.cnt); 343 sprintf(buf, "../keys/%d", key->debugfs.cnt);
344 sdata->debugfs.default_mgmt_key = 344 sdata->debugfs.default_mgmt_key =
345 debugfs_create_symlink("default_mgmt_key", 345 debugfs_create_symlink("default_mgmt_key",
346 sdata->debugfs.dir, buf); 346 sdata->vif.debugfs_dir, buf);
347 } else 347 } else
348 ieee80211_debugfs_key_remove_mgmt_default(sdata); 348 ieee80211_debugfs_key_remove_mgmt_default(sdata);
349} 349}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 059bbb82e84f..ddb426867904 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -521,7 +521,7 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,
521#endif 521#endif
522 522
523#define DEBUGFS_ADD_MODE(name, mode) \ 523#define DEBUGFS_ADD_MODE(name, mode) \
524 debugfs_create_file(#name, mode, sdata->debugfs.dir, \ 524 debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \
525 sdata, &name##_ops); 525 sdata, &name##_ops);
526 526
527#define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400) 527#define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400)
@@ -577,7 +577,7 @@ static void add_mesh_files(struct ieee80211_sub_if_data *sdata)
577static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) 577static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
578{ 578{
579 struct dentry *dir = debugfs_create_dir("mesh_stats", 579 struct dentry *dir = debugfs_create_dir("mesh_stats",
580 sdata->debugfs.dir); 580 sdata->vif.debugfs_dir);
581#define MESHSTATS_ADD(name)\ 581#define MESHSTATS_ADD(name)\
582 debugfs_create_file(#name, 0400, dir, sdata, &name##_ops); 582 debugfs_create_file(#name, 0400, dir, sdata, &name##_ops);
583 583
@@ -594,7 +594,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
594static void add_mesh_config(struct ieee80211_sub_if_data *sdata) 594static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
595{ 595{
596 struct dentry *dir = debugfs_create_dir("mesh_config", 596 struct dentry *dir = debugfs_create_dir("mesh_config",
597 sdata->debugfs.dir); 597 sdata->vif.debugfs_dir);
598 598
599#define MESHPARAMS_ADD(name) \ 599#define MESHPARAMS_ADD(name) \
600 debugfs_create_file(#name, 0600, dir, sdata, &name##_ops); 600 debugfs_create_file(#name, 0600, dir, sdata, &name##_ops);
@@ -631,7 +631,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
631 631
632static void add_files(struct ieee80211_sub_if_data *sdata) 632static void add_files(struct ieee80211_sub_if_data *sdata)
633{ 633{
634 if (!sdata->debugfs.dir) 634 if (!sdata->vif.debugfs_dir)
635 return; 635 return;
636 636
637 DEBUGFS_ADD(flags); 637 DEBUGFS_ADD(flags);
@@ -673,21 +673,21 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
673 char buf[10+IFNAMSIZ]; 673 char buf[10+IFNAMSIZ];
674 674
675 sprintf(buf, "netdev:%s", sdata->name); 675 sprintf(buf, "netdev:%s", sdata->name);
676 sdata->debugfs.dir = debugfs_create_dir(buf, 676 sdata->vif.debugfs_dir = debugfs_create_dir(buf,
677 sdata->local->hw.wiphy->debugfsdir); 677 sdata->local->hw.wiphy->debugfsdir);
678 if (sdata->debugfs.dir) 678 if (sdata->vif.debugfs_dir)
679 sdata->debugfs.subdir_stations = debugfs_create_dir("stations", 679 sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
680 sdata->debugfs.dir); 680 sdata->vif.debugfs_dir);
681 add_files(sdata); 681 add_files(sdata);
682} 682}
683 683
684void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) 684void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
685{ 685{
686 if (!sdata->debugfs.dir) 686 if (!sdata->vif.debugfs_dir)
687 return; 687 return;
688 688
689 debugfs_remove_recursive(sdata->debugfs.dir); 689 debugfs_remove_recursive(sdata->vif.debugfs_dir);
690 sdata->debugfs.dir = NULL; 690 sdata->vif.debugfs_dir = NULL;
691} 691}
692 692
693void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) 693void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
@@ -695,7 +695,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
695 struct dentry *dir; 695 struct dentry *dir;
696 char buf[10 + IFNAMSIZ]; 696 char buf[10 + IFNAMSIZ];
697 697
698 dir = sdata->debugfs.dir; 698 dir = sdata->vif.debugfs_dir;
699 699
700 if (!dir) 700 if (!dir)
701 return; 701 return;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c7591f73dbc3..4f841fe559df 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -325,6 +325,36 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
325} 325}
326STA_OPS(ht_capa); 326STA_OPS(ht_capa);
327 327
328static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
329 size_t count, loff_t *ppos)
330{
331 char buf[128], *p = buf;
332 struct sta_info *sta = file->private_data;
333 struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap;
334
335 p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n",
336 vhtc->vht_supported ? "" : "not ");
337 if (vhtc->vht_supported) {
338 p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.8x\n", vhtc->cap);
339
340 p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n",
341 le16_to_cpu(vhtc->vht_mcs.rx_mcs_map));
342 if (vhtc->vht_mcs.rx_highest)
343 p += scnprintf(p, sizeof(buf)+buf-p,
344 "MCS RX highest: %d Mbps\n",
345 le16_to_cpu(vhtc->vht_mcs.rx_highest));
346 p += scnprintf(p, sizeof(buf)+buf-p, "TX MCS: %.4x\n",
347 le16_to_cpu(vhtc->vht_mcs.tx_mcs_map));
348 if (vhtc->vht_mcs.tx_highest)
349 p += scnprintf(p, sizeof(buf)+buf-p,
350 "MCS TX highest: %d Mbps\n",
351 le16_to_cpu(vhtc->vht_mcs.tx_highest));
352 }
353
354 return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
355}
356STA_OPS(vht_capa);
357
328static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf, 358static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf,
329 size_t count, loff_t *ppos) 359 size_t count, loff_t *ppos)
330{ 360{
@@ -405,6 +435,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
405 DEBUGFS_ADD(dev); 435 DEBUGFS_ADD(dev);
406 DEBUGFS_ADD(last_signal); 436 DEBUGFS_ADD(last_signal);
407 DEBUGFS_ADD(ht_capa); 437 DEBUGFS_ADD(ht_capa);
438 DEBUGFS_ADD(vht_capa);
408 DEBUGFS_ADD(last_ack_signal); 439 DEBUGFS_ADD(last_ack_signal);
409 DEBUGFS_ADD(current_tx_rate); 440 DEBUGFS_ADD(current_tx_rate);
410 DEBUGFS_ADD(last_rx_rate); 441 DEBUGFS_ADD(last_rx_rate);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index ee56d0779d8b..169664c122e2 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -241,6 +241,22 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
241 return ret; 241 return ret;
242} 242}
243 243
244static inline void drv_set_multicast_list(struct ieee80211_local *local,
245 struct ieee80211_sub_if_data *sdata,
246 struct netdev_hw_addr_list *mc_list)
247{
248 bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI;
249
250 trace_drv_set_multicast_list(local, sdata, mc_list->count);
251
252 check_sdata_in_driver(sdata);
253
254 if (local->ops->set_multicast_list)
255 local->ops->set_multicast_list(&local->hw, &sdata->vif,
256 allmulti, mc_list);
257 trace_drv_return_void(local);
258}
259
244static inline void drv_configure_filter(struct ieee80211_local *local, 260static inline void drv_configure_filter(struct ieee80211_local *local,
245 unsigned int changed_flags, 261 unsigned int changed_flags,
246 unsigned int *total_flags, 262 unsigned int *total_flags,
@@ -531,43 +547,6 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local,
531 local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, 547 local->ops->sta_remove_debugfs(&local->hw, &sdata->vif,
532 sta, dir); 548 sta, dir);
533} 549}
534
535static inline
536void drv_add_interface_debugfs(struct ieee80211_local *local,
537 struct ieee80211_sub_if_data *sdata)
538{
539 might_sleep();
540
541 check_sdata_in_driver(sdata);
542
543 if (!local->ops->add_interface_debugfs)
544 return;
545
546 local->ops->add_interface_debugfs(&local->hw, &sdata->vif,
547 sdata->debugfs.dir);
548}
549
550static inline
551void drv_remove_interface_debugfs(struct ieee80211_local *local,
552 struct ieee80211_sub_if_data *sdata)
553{
554 might_sleep();
555
556 check_sdata_in_driver(sdata);
557
558 if (!local->ops->remove_interface_debugfs)
559 return;
560
561 local->ops->remove_interface_debugfs(&local->hw, &sdata->vif,
562 sdata->debugfs.dir);
563}
564#else
565static inline
566void drv_add_interface_debugfs(struct ieee80211_local *local,
567 struct ieee80211_sub_if_data *sdata) {}
568static inline
569void drv_remove_interface_debugfs(struct ieee80211_local *local,
570 struct ieee80211_sub_if_data *sdata) {}
571#endif 550#endif
572 551
573static inline __must_check 552static inline __must_check
@@ -741,13 +720,14 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local)
741 local->ops->rfkill_poll(&local->hw); 720 local->ops->rfkill_poll(&local->hw);
742} 721}
743 722
744static inline void drv_flush(struct ieee80211_local *local, bool drop) 723static inline void drv_flush(struct ieee80211_local *local,
724 u32 queues, bool drop)
745{ 725{
746 might_sleep(); 726 might_sleep();
747 727
748 trace_drv_flush(local, drop); 728 trace_drv_flush(local, queues, drop);
749 if (local->ops->flush) 729 if (local->ops->flush)
750 local->ops->flush(&local->hw, drop); 730 local->ops->flush(&local->hw, queues, drop);
751 trace_drv_return_void(local); 731 trace_drv_return_void(local);
752} 732}
753 733
@@ -787,15 +767,16 @@ static inline int drv_get_antenna(struct ieee80211_local *local,
787static inline int drv_remain_on_channel(struct ieee80211_local *local, 767static inline int drv_remain_on_channel(struct ieee80211_local *local,
788 struct ieee80211_sub_if_data *sdata, 768 struct ieee80211_sub_if_data *sdata,
789 struct ieee80211_channel *chan, 769 struct ieee80211_channel *chan,
790 unsigned int duration) 770 unsigned int duration,
771 enum ieee80211_roc_type type)
791{ 772{
792 int ret; 773 int ret;
793 774
794 might_sleep(); 775 might_sleep();
795 776
796 trace_drv_remain_on_channel(local, sdata, chan, duration); 777 trace_drv_remain_on_channel(local, sdata, chan, duration, type);
797 ret = local->ops->remain_on_channel(&local->hw, &sdata->vif, 778 ret = local->ops->remain_on_channel(&local->hw, &sdata->vif,
798 chan, duration); 779 chan, duration, type);
799 trace_drv_return_int(local, ret); 780 trace_drv_return_int(local, ret);
800 781
801 return ret; 782 return ret;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 0db25d4bb223..af8cee06e4f3 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -40,13 +40,6 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
40 if (!ht_cap->ht_supported) 40 if (!ht_cap->ht_supported)
41 return; 41 return;
42 42
43 if (sdata->vif.type != NL80211_IFTYPE_STATION) {
44 /* AP interfaces call this code when adding new stations,
45 * so just silently ignore non station interfaces.
46 */
47 return;
48 }
49
50 /* NOTE: If you add more over-rides here, update register_hw 43 /* NOTE: If you add more over-rides here, update register_hw
51 * ht_capa_mod_msk logic in main.c as well. 44 * ht_capa_mod_msk logic in main.c as well.
52 * And, if this method can ever change ht_cap.ht_supported, fix 45 * And, if this method can ever change ht_cap.ht_supported, fix
@@ -97,7 +90,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
97 const struct ieee80211_ht_cap *ht_cap_ie, 90 const struct ieee80211_ht_cap *ht_cap_ie,
98 struct sta_info *sta) 91 struct sta_info *sta)
99{ 92{
100 struct ieee80211_sta_ht_cap ht_cap; 93 struct ieee80211_sta_ht_cap ht_cap, own_cap;
101 u8 ampdu_info, tx_mcs_set_cap; 94 u8 ampdu_info, tx_mcs_set_cap;
102 int i, max_tx_streams; 95 int i, max_tx_streams;
103 bool changed; 96 bool changed;
@@ -111,6 +104,18 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
111 104
112 ht_cap.ht_supported = true; 105 ht_cap.ht_supported = true;
113 106
107 own_cap = sband->ht_cap;
108
109 /*
110 * If user has specified capability over-rides, take care
111 * of that if the station we're setting up is the AP that
112 * we advertised a restricted capability set to. Override
113 * our own capabilities and then use those below.
114 */
115 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
116 !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
117 ieee80211_apply_htcap_overrides(sdata, &own_cap);
118
114 /* 119 /*
115 * The bits listed in this expression should be 120 * The bits listed in this expression should be
116 * the same for the peer and us, if the station 121 * the same for the peer and us, if the station
@@ -118,21 +123,20 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
118 * we mask them out. 123 * we mask them out.
119 */ 124 */
120 ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) & 125 ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) &
121 (sband->ht_cap.cap | 126 (own_cap.cap | ~(IEEE80211_HT_CAP_LDPC_CODING |
122 ~(IEEE80211_HT_CAP_LDPC_CODING | 127 IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
123 IEEE80211_HT_CAP_SUP_WIDTH_20_40 | 128 IEEE80211_HT_CAP_GRN_FLD |
124 IEEE80211_HT_CAP_GRN_FLD | 129 IEEE80211_HT_CAP_SGI_20 |
125 IEEE80211_HT_CAP_SGI_20 | 130 IEEE80211_HT_CAP_SGI_40 |
126 IEEE80211_HT_CAP_SGI_40 | 131 IEEE80211_HT_CAP_DSSSCCK40));
127 IEEE80211_HT_CAP_DSSSCCK40));
128 132
129 /* 133 /*
130 * The STBC bits are asymmetric -- if we don't have 134 * The STBC bits are asymmetric -- if we don't have
131 * TX then mask out the peer's RX and vice versa. 135 * TX then mask out the peer's RX and vice versa.
132 */ 136 */
133 if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC)) 137 if (!(own_cap.cap & IEEE80211_HT_CAP_TX_STBC))
134 ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC; 138 ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC;
135 if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC)) 139 if (!(own_cap.cap & IEEE80211_HT_CAP_RX_STBC))
136 ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC; 140 ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC;
137 141
138 ampdu_info = ht_cap_ie->ampdu_params_info; 142 ampdu_info = ht_cap_ie->ampdu_params_info;
@@ -142,7 +146,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
142 (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; 146 (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2;
143 147
144 /* own MCS TX capabilities */ 148 /* own MCS TX capabilities */
145 tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; 149 tx_mcs_set_cap = own_cap.mcs.tx_params;
146 150
147 /* Copy peer MCS TX capabilities, the driver might need them. */ 151 /* Copy peer MCS TX capabilities, the driver might need them. */
148 ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params; 152 ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params;
@@ -168,26 +172,20 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
168 */ 172 */
169 for (i = 0; i < max_tx_streams; i++) 173 for (i = 0; i < max_tx_streams; i++)
170 ht_cap.mcs.rx_mask[i] = 174 ht_cap.mcs.rx_mask[i] =
171 sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; 175 own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i];
172 176
173 if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION) 177 if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION)
174 for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE; 178 for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE;
175 i < IEEE80211_HT_MCS_MASK_LEN; i++) 179 i < IEEE80211_HT_MCS_MASK_LEN; i++)
176 ht_cap.mcs.rx_mask[i] = 180 ht_cap.mcs.rx_mask[i] =
177 sband->ht_cap.mcs.rx_mask[i] & 181 own_cap.mcs.rx_mask[i] &
178 ht_cap_ie->mcs.rx_mask[i]; 182 ht_cap_ie->mcs.rx_mask[i];
179 183
180 /* handle MCS rate 32 too */ 184 /* handle MCS rate 32 too */
181 if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) 185 if (own_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1)
182 ht_cap.mcs.rx_mask[32/8] |= 1; 186 ht_cap.mcs.rx_mask[32/8] |= 1;
183 187
184 apply: 188 apply:
185 /*
186 * If user has specified capability over-rides, take care
187 * of that here.
188 */
189 ieee80211_apply_htcap_overrides(sdata, &ht_cap);
190
191 changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); 189 changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
192 190
193 memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); 191 memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 40b71dfcc79d..539d4a11b47b 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -985,36 +985,9 @@ static void ieee80211_ibss_timer(unsigned long data)
985{ 985{
986 struct ieee80211_sub_if_data *sdata = 986 struct ieee80211_sub_if_data *sdata =
987 (struct ieee80211_sub_if_data *) data; 987 (struct ieee80211_sub_if_data *) data;
988 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
989 struct ieee80211_local *local = sdata->local;
990
991 if (local->quiescing) {
992 ifibss->timer_running = true;
993 return;
994 }
995
996 ieee80211_queue_work(&local->hw, &sdata->work);
997}
998
999#ifdef CONFIG_PM
1000void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata)
1001{
1002 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
1003 988
1004 if (del_timer_sync(&ifibss->timer)) 989 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
1005 ifibss->timer_running = true;
1006}
1007
1008void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata)
1009{
1010 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
1011
1012 if (ifibss->timer_running) {
1013 add_timer(&ifibss->timer);
1014 ifibss->timer_running = false;
1015 }
1016} 990}
1017#endif
1018 991
1019void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) 992void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
1020{ 993{
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5672533a0832..0b09716d22ad 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -316,6 +316,7 @@ struct ieee80211_roc_work {
316 u32 duration, req_duration; 316 u32 duration, req_duration;
317 struct sk_buff *frame; 317 struct sk_buff *frame;
318 u64 cookie, mgmt_tx_cookie; 318 u64 cookie, mgmt_tx_cookie;
319 enum ieee80211_roc_type type;
319}; 320};
320 321
321/* flags used in struct ieee80211_if_managed.flags */ 322/* flags used in struct ieee80211_if_managed.flags */
@@ -401,7 +402,6 @@ struct ieee80211_if_managed {
401 402
402 u16 aid; 403 u16 aid;
403 404
404 unsigned long timers_running; /* used for quiesce/restart */
405 bool powersave; /* powersave requested for this iface */ 405 bool powersave; /* powersave requested for this iface */
406 bool broken_ap; /* AP is broken -- turn off powersave */ 406 bool broken_ap; /* AP is broken -- turn off powersave */
407 u8 dtim_period; 407 u8 dtim_period;
@@ -480,6 +480,8 @@ struct ieee80211_if_managed {
480 480
481 struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */ 481 struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */
482 struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */ 482 struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */
483 struct ieee80211_vht_cap vht_capa; /* configured VHT overrides */
484 struct ieee80211_vht_cap vht_capa_mask; /* Valid parts of vht_capa */
483}; 485};
484 486
485struct ieee80211_if_ibss { 487struct ieee80211_if_ibss {
@@ -491,8 +493,6 @@ struct ieee80211_if_ibss {
491 493
492 u32 basic_rates; 494 u32 basic_rates;
493 495
494 bool timer_running;
495
496 bool fixed_bssid; 496 bool fixed_bssid;
497 bool fixed_channel; 497 bool fixed_channel;
498 bool privacy; 498 bool privacy;
@@ -544,8 +544,6 @@ struct ieee80211_if_mesh {
544 struct timer_list mesh_path_timer; 544 struct timer_list mesh_path_timer;
545 struct timer_list mesh_path_root_timer; 545 struct timer_list mesh_path_root_timer;
546 546
547 unsigned long timers_running;
548
549 unsigned long wrkq_flags; 547 unsigned long wrkq_flags;
550 548
551 u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; 549 u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN];
@@ -591,6 +589,7 @@ struct ieee80211_if_mesh {
591 IEEE80211_MESH_SEC_AUTHED = 0x1, 589 IEEE80211_MESH_SEC_AUTHED = 0x1,
592 IEEE80211_MESH_SEC_SECURED = 0x2, 590 IEEE80211_MESH_SEC_SECURED = 0x2,
593 } security; 591 } security;
592 bool user_mpm;
594 /* Extensible Synchronization Framework */ 593 /* Extensible Synchronization Framework */
595 const struct ieee80211_mesh_sync_ops *sync_ops; 594 const struct ieee80211_mesh_sync_ops *sync_ops;
596 s64 sync_offset_clockdrift_max; 595 s64 sync_offset_clockdrift_max;
@@ -683,6 +682,8 @@ struct ieee80211_sub_if_data {
683 682
684 /* count for keys needing tailroom space allocation */ 683 /* count for keys needing tailroom space allocation */
685 int crypto_tx_tailroom_needed_cnt; 684 int crypto_tx_tailroom_needed_cnt;
685 int crypto_tx_tailroom_pending_dec;
686 struct delayed_work dec_tailroom_needed_wk;
686 687
687 struct net_device *dev; 688 struct net_device *dev;
688 struct ieee80211_local *local; 689 struct ieee80211_local *local;
@@ -758,7 +759,6 @@ struct ieee80211_sub_if_data {
758 759
759#ifdef CONFIG_MAC80211_DEBUGFS 760#ifdef CONFIG_MAC80211_DEBUGFS
760 struct { 761 struct {
761 struct dentry *dir;
762 struct dentry *subdir_stations; 762 struct dentry *subdir_stations;
763 struct dentry *default_unicast_key; 763 struct dentry *default_unicast_key;
764 struct dentry *default_multicast_key; 764 struct dentry *default_multicast_key;
@@ -766,10 +766,6 @@ struct ieee80211_sub_if_data {
766 } debugfs; 766 } debugfs;
767#endif 767#endif
768 768
769#ifdef CONFIG_PM
770 struct ieee80211_bss_conf suspend_bss_conf;
771#endif
772
773 /* must be last, dynamically sized area in this! */ 769 /* must be last, dynamically sized area in this! */
774 struct ieee80211_vif vif; 770 struct ieee80211_vif vif;
775}; 771};
@@ -804,11 +800,6 @@ enum sdata_queue_type {
804enum { 800enum {
805 IEEE80211_RX_MSG = 1, 801 IEEE80211_RX_MSG = 1,
806 IEEE80211_TX_STATUS_MSG = 2, 802 IEEE80211_TX_STATUS_MSG = 2,
807 IEEE80211_EOSP_MSG = 3,
808};
809
810struct skb_eosp_msg_data {
811 u8 sta[ETH_ALEN], iface[ETH_ALEN];
812}; 803};
813 804
814enum queue_stop_reason { 805enum queue_stop_reason {
@@ -819,6 +810,7 @@ enum queue_stop_reason {
819 IEEE80211_QUEUE_STOP_REASON_SUSPEND, 810 IEEE80211_QUEUE_STOP_REASON_SUSPEND,
820 IEEE80211_QUEUE_STOP_REASON_SKB_ADD, 811 IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
821 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, 812 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
813 IEEE80211_QUEUE_STOP_REASON_FLUSH,
822}; 814};
823 815
824#ifdef CONFIG_MAC80211_LEDS 816#ifdef CONFIG_MAC80211_LEDS
@@ -1137,11 +1129,6 @@ struct ieee80211_local {
1137 1129
1138 struct ieee80211_sub_if_data __rcu *p2p_sdata; 1130 struct ieee80211_sub_if_data __rcu *p2p_sdata;
1139 1131
1140 /* dummy netdev for use w/ NAPI */
1141 struct net_device napi_dev;
1142
1143 struct napi_struct napi;
1144
1145 /* virtual monitor interface */ 1132 /* virtual monitor interface */
1146 struct ieee80211_sub_if_data __rcu *monitor_sdata; 1133 struct ieee80211_sub_if_data __rcu *monitor_sdata;
1147 struct cfg80211_chan_def monitor_chandef; 1134 struct cfg80211_chan_def monitor_chandef;
@@ -1284,8 +1271,6 @@ void
1284ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, 1271ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1285 const struct ieee80211_channel_sw_ie *sw_elem, 1272 const struct ieee80211_channel_sw_ie *sw_elem,
1286 struct ieee80211_bss *bss, u64 timestamp); 1273 struct ieee80211_bss *bss, u64 timestamp);
1287void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata);
1288void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
1289void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); 1274void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
1290void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1275void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1291 struct sk_buff *skb); 1276 struct sk_buff *skb);
@@ -1303,8 +1288,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
1303int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, 1288int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1304 struct cfg80211_ibss_params *params); 1289 struct cfg80211_ibss_params *params);
1305int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata); 1290int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata);
1306void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata);
1307void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata);
1308void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata); 1291void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata);
1309void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1292void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1310 struct sk_buff *skb); 1293 struct sk_buff *skb);
@@ -1443,6 +1426,8 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta);
1443void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, 1426void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
1444 struct sta_info *sta, u8 opmode, 1427 struct sta_info *sta, u8 opmode,
1445 enum ieee80211_band band, bool nss_only); 1428 enum ieee80211_band band, bool nss_only);
1429void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
1430 struct ieee80211_sta_vht_cap *vht_cap);
1446 1431
1447/* Spectrum management */ 1432/* Spectrum management */
1448void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 1433void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1540,8 +1525,10 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
1540 struct ieee80211_hdr *hdr, bool ack); 1525 struct ieee80211_hdr *hdr, bool ack);
1541 1526
1542void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, 1527void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
1528 unsigned long queues,
1543 enum queue_stop_reason reason); 1529 enum queue_stop_reason reason);
1544void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, 1530void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
1531 unsigned long queues,
1545 enum queue_stop_reason reason); 1532 enum queue_stop_reason reason);
1546void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, 1533void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
1547 enum queue_stop_reason reason); 1534 enum queue_stop_reason reason);
@@ -1558,6 +1545,8 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local,
1558{ 1545{
1559 ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); 1546 ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL);
1560} 1547}
1548void ieee80211_flush_queues(struct ieee80211_local *local,
1549 struct ieee80211_sub_if_data *sdata);
1561 1550
1562void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, 1551void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
1563 u16 transaction, u16 auth_alg, u16 status, 1552 u16 transaction, u16 auth_alg, u16 status,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9ed49ad0380f..e8a260f53c16 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -92,7 +92,7 @@ static u32 __ieee80211_idle_on(struct ieee80211_local *local)
92 if (local->hw.conf.flags & IEEE80211_CONF_IDLE) 92 if (local->hw.conf.flags & IEEE80211_CONF_IDLE)
93 return 0; 93 return 0;
94 94
95 drv_flush(local, false); 95 ieee80211_flush_queues(local, NULL);
96 96
97 local->hw.conf.flags |= IEEE80211_CONF_IDLE; 97 local->hw.conf.flags |= IEEE80211_CONF_IDLE;
98 return IEEE80211_CONF_CHANGE_IDLE; 98 return IEEE80211_CONF_CHANGE_IDLE;
@@ -499,8 +499,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
499 res = drv_start(local); 499 res = drv_start(local);
500 if (res) 500 if (res)
501 goto err_del_bss; 501 goto err_del_bss;
502 if (local->ops->napi_poll)
503 napi_enable(&local->napi);
504 /* we're brought up, everything changes */ 502 /* we're brought up, everything changes */
505 hw_reconf_flags = ~0; 503 hw_reconf_flags = ~0;
506 ieee80211_led_radio(local, true); 504 ieee80211_led_radio(local, true);
@@ -573,8 +571,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
573 goto err_del_interface; 571 goto err_del_interface;
574 } 572 }
575 573
576 drv_add_interface_debugfs(local, sdata);
577
578 if (sdata->vif.type == NL80211_IFTYPE_AP) { 574 if (sdata->vif.type == NL80211_IFTYPE_AP) {
579 local->fif_pspoll++; 575 local->fif_pspoll++;
580 local->fif_probe_req++; 576 local->fif_probe_req++;
@@ -852,15 +848,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
852 rcu_barrier(); 848 rcu_barrier();
853 sta_info_flush_cleanup(sdata); 849 sta_info_flush_cleanup(sdata);
854 850
855 skb_queue_purge(&sdata->skb_queue);
856
857 /* 851 /*
858 * Free all remaining keys, there shouldn't be any, 852 * Free all remaining keys, there shouldn't be any,
859 * except maybe group keys in AP more or WDS? 853 * except maybe in WDS mode?
860 */ 854 */
861 ieee80211_free_keys(sdata); 855 ieee80211_free_keys(sdata);
862 856
863 drv_remove_interface_debugfs(local, sdata); 857 /* fall through */
858 case NL80211_IFTYPE_AP:
859 skb_queue_purge(&sdata->skb_queue);
864 860
865 if (going_down) 861 if (going_down)
866 drv_remove_interface(local, sdata); 862 drv_remove_interface(local, sdata);
@@ -871,8 +867,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
871 ieee80211_recalc_ps(local, -1); 867 ieee80211_recalc_ps(local, -1);
872 868
873 if (local->open_count == 0) { 869 if (local->open_count == 0) {
874 if (local->ops->napi_poll)
875 napi_disable(&local->napi);
876 ieee80211_clear_tx_pending(local); 870 ieee80211_clear_tx_pending(local);
877 ieee80211_stop_device(local); 871 ieee80211_stop_device(local);
878 872
@@ -935,6 +929,17 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
935 atomic_dec(&local->iff_promiscs); 929 atomic_dec(&local->iff_promiscs);
936 sdata->flags ^= IEEE80211_SDATA_PROMISC; 930 sdata->flags ^= IEEE80211_SDATA_PROMISC;
937 } 931 }
932
933 /*
934 * TODO: If somebody needs this on AP interfaces,
935 * it can be enabled easily but multicast
936 * addresses from VLANs need to be synced.
937 */
938 if (sdata->vif.type != NL80211_IFTYPE_MONITOR &&
939 sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
940 sdata->vif.type != NL80211_IFTYPE_AP)
941 drv_set_multicast_list(local, sdata, &dev->mc);
942
938 spin_lock_bh(&local->filter_lock); 943 spin_lock_bh(&local->filter_lock);
939 __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); 944 __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
940 spin_unlock_bh(&local->filter_lock); 945 spin_unlock_bh(&local->filter_lock);
@@ -1561,6 +1566,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1561 INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk); 1566 INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk);
1562 INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work, 1567 INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work,
1563 ieee80211_dfs_cac_timer_work); 1568 ieee80211_dfs_cac_timer_work);
1569 INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk,
1570 ieee80211_delayed_tailroom_dec);
1564 1571
1565 for (i = 0; i < IEEE80211_NUM_BANDS; i++) { 1572 for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
1566 struct ieee80211_supported_band *sband; 1573 struct ieee80211_supported_band *sband;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index ef252eb58c36..67059b88fea5 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -248,11 +248,11 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
248} 248}
249 249
250 250
251static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, 251static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
252 struct sta_info *sta, 252 struct sta_info *sta,
253 bool pairwise, 253 bool pairwise,
254 struct ieee80211_key *old, 254 struct ieee80211_key *old,
255 struct ieee80211_key *new) 255 struct ieee80211_key *new)
256{ 256{
257 int idx; 257 int idx;
258 bool defunikey, defmultikey, defmgmtkey; 258 bool defunikey, defmultikey, defmgmtkey;
@@ -397,7 +397,41 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
397 return key; 397 return key;
398} 398}
399 399
400static void __ieee80211_key_destroy(struct ieee80211_key *key) 400static void ieee80211_key_free_common(struct ieee80211_key *key)
401{
402 if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
403 ieee80211_aes_key_free(key->u.ccmp.tfm);
404 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
405 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
406 kfree(key);
407}
408
409static void __ieee80211_key_destroy(struct ieee80211_key *key,
410 bool delay_tailroom)
411{
412 if (key->local)
413 ieee80211_key_disable_hw_accel(key);
414
415 if (key->local) {
416 struct ieee80211_sub_if_data *sdata = key->sdata;
417
418 ieee80211_debugfs_key_remove(key);
419
420 if (delay_tailroom) {
421 /* see ieee80211_delayed_tailroom_dec */
422 sdata->crypto_tx_tailroom_pending_dec++;
423 schedule_delayed_work(&sdata->dec_tailroom_needed_wk,
424 HZ/2);
425 } else {
426 sdata->crypto_tx_tailroom_needed_cnt--;
427 }
428 }
429
430 ieee80211_key_free_common(key);
431}
432
433static void ieee80211_key_destroy(struct ieee80211_key *key,
434 bool delay_tailroom)
401{ 435{
402 if (!key) 436 if (!key)
403 return; 437 return;
@@ -408,19 +442,13 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
408 */ 442 */
409 synchronize_net(); 443 synchronize_net();
410 444
411 if (key->local) 445 __ieee80211_key_destroy(key, delay_tailroom);
412 ieee80211_key_disable_hw_accel(key); 446}
413
414 if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
415 ieee80211_aes_key_free(key->u.ccmp.tfm);
416 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
417 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
418 if (key->local) {
419 ieee80211_debugfs_key_remove(key);
420 key->sdata->crypto_tx_tailroom_needed_cnt--;
421 }
422 447
423 kfree(key); 448void ieee80211_key_free_unused(struct ieee80211_key *key)
449{
450 WARN_ON(key->sdata || key->local);
451 ieee80211_key_free_common(key);
424} 452}
425 453
426int ieee80211_key_link(struct ieee80211_key *key, 454int ieee80211_key_link(struct ieee80211_key *key,
@@ -440,32 +468,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
440 key->sdata = sdata; 468 key->sdata = sdata;
441 key->sta = sta; 469 key->sta = sta;
442 470
443 if (sta) {
444 /*
445 * some hardware cannot handle TKIP with QoS, so
446 * we indicate whether QoS could be in use.
447 */
448 if (test_sta_flag(sta, WLAN_STA_WME))
449 key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
450 } else {
451 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
452 struct sta_info *ap;
453
454 /*
455 * We're getting a sta pointer in, so must be under
456 * appropriate locking for sta_info_get().
457 */
458
459 /* same here, the AP could be using QoS */
460 ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid);
461 if (ap) {
462 if (test_sta_flag(ap, WLAN_STA_WME))
463 key->conf.flags |=
464 IEEE80211_KEY_FLAG_WMM_STA;
465 }
466 }
467 }
468
469 mutex_lock(&sdata->local->key_mtx); 471 mutex_lock(&sdata->local->key_mtx);
470 472
471 if (sta && pairwise) 473 if (sta && pairwise)
@@ -477,19 +479,22 @@ int ieee80211_key_link(struct ieee80211_key *key,
477 479
478 increment_tailroom_need_count(sdata); 480 increment_tailroom_need_count(sdata);
479 481
480 __ieee80211_key_replace(sdata, sta, pairwise, old_key, key); 482 ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
481 __ieee80211_key_destroy(old_key); 483 ieee80211_key_destroy(old_key, true);
482 484
483 ieee80211_debugfs_key_add(key); 485 ieee80211_debugfs_key_add(key);
484 486
485 ret = ieee80211_key_enable_hw_accel(key); 487 ret = ieee80211_key_enable_hw_accel(key);
486 488
489 if (ret)
490 ieee80211_key_free(key, true);
491
487 mutex_unlock(&sdata->local->key_mtx); 492 mutex_unlock(&sdata->local->key_mtx);
488 493
489 return ret; 494 return ret;
490} 495}
491 496
492void __ieee80211_key_free(struct ieee80211_key *key) 497void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
493{ 498{
494 if (!key) 499 if (!key)
495 return; 500 return;
@@ -498,18 +503,10 @@ void __ieee80211_key_free(struct ieee80211_key *key)
498 * Replace key with nothingness if it was ever used. 503 * Replace key with nothingness if it was ever used.
499 */ 504 */
500 if (key->sdata) 505 if (key->sdata)
501 __ieee80211_key_replace(key->sdata, key->sta, 506 ieee80211_key_replace(key->sdata, key->sta,
502 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, 507 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
503 key, NULL); 508 key, NULL);
504 __ieee80211_key_destroy(key); 509 ieee80211_key_destroy(key, delay_tailroom);
505}
506
507void ieee80211_key_free(struct ieee80211_local *local,
508 struct ieee80211_key *key)
509{
510 mutex_lock(&local->key_mtx);
511 __ieee80211_key_free(key);
512 mutex_unlock(&local->key_mtx);
513} 510}
514 511
515void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) 512void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
@@ -566,36 +563,109 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
566} 563}
567EXPORT_SYMBOL(ieee80211_iter_keys); 564EXPORT_SYMBOL(ieee80211_iter_keys);
568 565
569void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) 566void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
570{ 567{
571 struct ieee80211_key *key; 568 struct ieee80211_key *key, *tmp;
569 LIST_HEAD(keys);
572 570
573 ASSERT_RTNL(); 571 cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk);
574 572
575 mutex_lock(&sdata->local->key_mtx); 573 mutex_lock(&sdata->local->key_mtx);
576 574
577 list_for_each_entry(key, &sdata->key_list, list) 575 sdata->crypto_tx_tailroom_needed_cnt -=
578 ieee80211_key_disable_hw_accel(key); 576 sdata->crypto_tx_tailroom_pending_dec;
577 sdata->crypto_tx_tailroom_pending_dec = 0;
578
579 ieee80211_debugfs_key_remove_mgmt_default(sdata);
580
581 list_for_each_entry_safe(key, tmp, &sdata->key_list, list) {
582 ieee80211_key_replace(key->sdata, key->sta,
583 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
584 key, NULL);
585 list_add_tail(&key->list, &keys);
586 }
587
588 ieee80211_debugfs_key_update_default(sdata);
589
590 if (!list_empty(&keys)) {
591 synchronize_net();
592 list_for_each_entry_safe(key, tmp, &keys, list)
593 __ieee80211_key_destroy(key, false);
594 }
595
596 WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
597 sdata->crypto_tx_tailroom_pending_dec);
579 598
580 mutex_unlock(&sdata->local->key_mtx); 599 mutex_unlock(&sdata->local->key_mtx);
581} 600}
582 601
583void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) 602void ieee80211_free_sta_keys(struct ieee80211_local *local,
603 struct sta_info *sta)
584{ 604{
585 struct ieee80211_key *key, *tmp; 605 struct ieee80211_key *key, *tmp;
606 LIST_HEAD(keys);
607 int i;
586 608
587 mutex_lock(&sdata->local->key_mtx); 609 mutex_lock(&local->key_mtx);
610 for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
611 key = key_mtx_dereference(local, sta->gtk[i]);
612 if (!key)
613 continue;
614 ieee80211_key_replace(key->sdata, key->sta,
615 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
616 key, NULL);
617 list_add(&key->list, &keys);
618 }
588 619
589 ieee80211_debugfs_key_remove_mgmt_default(sdata); 620 key = key_mtx_dereference(local, sta->ptk);
621 if (key) {
622 ieee80211_key_replace(key->sdata, key->sta,
623 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
624 key, NULL);
625 list_add(&key->list, &keys);
626 }
590 627
591 list_for_each_entry_safe(key, tmp, &sdata->key_list, list) 628 /*
592 __ieee80211_key_free(key); 629 * NB: the station code relies on this being
630 * done even if there aren't any keys
631 */
632 synchronize_net();
593 633
594 ieee80211_debugfs_key_update_default(sdata); 634 list_for_each_entry_safe(key, tmp, &keys, list)
635 __ieee80211_key_destroy(key, true);
595 636
596 mutex_unlock(&sdata->local->key_mtx); 637 mutex_unlock(&local->key_mtx);
597} 638}
598 639
640void ieee80211_delayed_tailroom_dec(struct work_struct *wk)
641{
642 struct ieee80211_sub_if_data *sdata;
643
644 sdata = container_of(wk, struct ieee80211_sub_if_data,
645 dec_tailroom_needed_wk.work);
646
647 /*
648 * The reason for the delayed tailroom needed decrementing is to
649 * make roaming faster: during roaming, all keys are first deleted
650 * and then new keys are installed. The first new key causes the
651 * crypto_tx_tailroom_needed_cnt to go from 0 to 1, which invokes
652 * the cost of synchronize_net() (which can be slow). Avoid this
653 * by deferring the crypto_tx_tailroom_needed_cnt decrementing on
654 * key removal for a while, so if we roam the value is larger than
655 * zero and no 0->1 transition happens.
656 *
657 * The cost is that if the AP switching was from an AP with keys
658 * to one without, we still allocate tailroom while it would no
659 * longer be needed. However, in the typical (fast) roaming case
660 * within an ESS this usually won't happen.
661 */
662
663 mutex_lock(&sdata->local->key_mtx);
664 sdata->crypto_tx_tailroom_needed_cnt -=
665 sdata->crypto_tx_tailroom_pending_dec;
666 sdata->crypto_tx_tailroom_pending_dec = 0;
667 mutex_unlock(&sdata->local->key_mtx);
668}
599 669
600void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid, 670void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid,
601 const u8 *replay_ctr, gfp_t gfp) 671 const u8 *replay_ctr, gfp_t gfp)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 382dc44ed330..e8de3e6d7804 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -129,23 +129,25 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
129 size_t seq_len, const u8 *seq); 129 size_t seq_len, const u8 *seq);
130/* 130/*
131 * Insert a key into data structures (sdata, sta if necessary) 131 * Insert a key into data structures (sdata, sta if necessary)
132 * to make it used, free old key. 132 * to make it used, free old key. On failure, also free the new key.
133 */ 133 */
134int __must_check ieee80211_key_link(struct ieee80211_key *key, 134int ieee80211_key_link(struct ieee80211_key *key,
135 struct ieee80211_sub_if_data *sdata, 135 struct ieee80211_sub_if_data *sdata,
136 struct sta_info *sta); 136 struct sta_info *sta);
137void __ieee80211_key_free(struct ieee80211_key *key); 137void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom);
138void ieee80211_key_free(struct ieee80211_local *local, 138void ieee80211_key_free_unused(struct ieee80211_key *key);
139 struct ieee80211_key *key);
140void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, 139void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
141 bool uni, bool multi); 140 bool uni, bool multi);
142void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, 141void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
143 int idx); 142 int idx);
144void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); 143void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
144void ieee80211_free_sta_keys(struct ieee80211_local *local,
145 struct sta_info *sta);
145void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); 146void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
146void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);
147 147
148#define key_mtx_dereference(local, ref) \ 148#define key_mtx_dereference(local, ref) \
149 rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) 149 rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))
150 150
151void ieee80211_delayed_tailroom_dec(struct work_struct *wk);
152
151#endif /* IEEE80211_KEY_H */ 153#endif /* IEEE80211_KEY_H */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1a8591b77a13..c6f81ecc36a1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -100,7 +100,6 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
100 int power; 100 int power;
101 enum nl80211_channel_type channel_type; 101 enum nl80211_channel_type channel_type;
102 u32 offchannel_flag; 102 u32 offchannel_flag;
103 bool scanning = false;
104 103
105 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; 104 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
106 if (local->scan_channel) { 105 if (local->scan_channel) {
@@ -147,9 +146,6 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
147 changed |= IEEE80211_CONF_CHANGE_SMPS; 146 changed |= IEEE80211_CONF_CHANGE_SMPS;
148 } 147 }
149 148
150 scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
151 test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning) ||
152 test_bit(SCAN_HW_SCANNING, &local->scanning);
153 power = chan->max_power; 149 power = chan->max_power;
154 150
155 rcu_read_lock(); 151 rcu_read_lock();
@@ -226,8 +222,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
226static void ieee80211_tasklet_handler(unsigned long data) 222static void ieee80211_tasklet_handler(unsigned long data)
227{ 223{
228 struct ieee80211_local *local = (struct ieee80211_local *) data; 224 struct ieee80211_local *local = (struct ieee80211_local *) data;
229 struct sta_info *sta, *tmp;
230 struct skb_eosp_msg_data *eosp_data;
231 struct sk_buff *skb; 225 struct sk_buff *skb;
232 226
233 while ((skb = skb_dequeue(&local->skb_queue)) || 227 while ((skb = skb_dequeue(&local->skb_queue)) ||
@@ -243,18 +237,6 @@ static void ieee80211_tasklet_handler(unsigned long data)
243 skb->pkt_type = 0; 237 skb->pkt_type = 0;
244 ieee80211_tx_status(&local->hw, skb); 238 ieee80211_tx_status(&local->hw, skb);
245 break; 239 break;
246 case IEEE80211_EOSP_MSG:
247 eosp_data = (void *)skb->cb;
248 for_each_sta_info(local, eosp_data->sta, sta, tmp) {
249 /* skip wrong virtual interface */
250 if (memcmp(eosp_data->iface,
251 sta->sdata->vif.addr, ETH_ALEN))
252 continue;
253 clear_sta_flag(sta, WLAN_STA_SP);
254 break;
255 }
256 dev_kfree_skb(skb);
257 break;
258 default: 240 default:
259 WARN(1, "mac80211: Packet is of unknown type %d\n", 241 WARN(1, "mac80211: Packet is of unknown type %d\n",
260 skb->pkt_type); 242 skb->pkt_type);
@@ -295,8 +277,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
295 "Hardware restart was requested\n"); 277 "Hardware restart was requested\n");
296 278
297 /* use this reason, ieee80211_reconfig will unblock it */ 279 /* use this reason, ieee80211_reconfig will unblock it */
298 ieee80211_stop_queues_by_reason(hw, 280 ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
299 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 281 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
300 282
301 /* 283 /*
302 * Stop all Rx during the reconfig. We don't want state changes 284 * Stop all Rx during the reconfig. We don't want state changes
@@ -399,30 +381,6 @@ static int ieee80211_ifa6_changed(struct notifier_block *nb,
399} 381}
400#endif 382#endif
401 383
402static int ieee80211_napi_poll(struct napi_struct *napi, int budget)
403{
404 struct ieee80211_local *local =
405 container_of(napi, struct ieee80211_local, napi);
406
407 return local->ops->napi_poll(&local->hw, budget);
408}
409
410void ieee80211_napi_schedule(struct ieee80211_hw *hw)
411{
412 struct ieee80211_local *local = hw_to_local(hw);
413
414 napi_schedule(&local->napi);
415}
416EXPORT_SYMBOL(ieee80211_napi_schedule);
417
418void ieee80211_napi_complete(struct ieee80211_hw *hw)
419{
420 struct ieee80211_local *local = hw_to_local(hw);
421
422 napi_complete(&local->napi);
423}
424EXPORT_SYMBOL(ieee80211_napi_complete);
425
426/* There isn't a lot of sense in it, but you can transmit anything you like */ 384/* There isn't a lot of sense in it, but you can transmit anything you like */
427static const struct ieee80211_txrx_stypes 385static const struct ieee80211_txrx_stypes
428ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { 386ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
@@ -501,6 +459,27 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {
501 }, 459 },
502}; 460};
503 461
462static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = {
463 .vht_cap_info =
464 cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC |
465 IEEE80211_VHT_CAP_SHORT_GI_80 |
466 IEEE80211_VHT_CAP_SHORT_GI_160 |
467 IEEE80211_VHT_CAP_RXSTBC_1 |
468 IEEE80211_VHT_CAP_RXSTBC_2 |
469 IEEE80211_VHT_CAP_RXSTBC_3 |
470 IEEE80211_VHT_CAP_RXSTBC_4 |
471 IEEE80211_VHT_CAP_TXSTBC |
472 IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
473 IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
474 IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN |
475 IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN |
476 IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK),
477 .supp_mcs = {
478 .rx_mcs_map = cpu_to_le16(~0),
479 .tx_mcs_map = cpu_to_le16(~0),
480 },
481};
482
504static const u8 extended_capabilities[] = { 483static const u8 extended_capabilities[] = {
505 0, 0, 0, 0, 0, 0, 0, 484 0, 0, 0, 0, 0, 0, 0,
506 WLAN_EXT_CAPA8_OPMODE_NOTIF, 485 WLAN_EXT_CAPA8_OPMODE_NOTIF,
@@ -572,7 +551,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
572 wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | 551 wiphy->features |= NL80211_FEATURE_SK_TX_STATUS |
573 NL80211_FEATURE_SAE | 552 NL80211_FEATURE_SAE |
574 NL80211_FEATURE_HT_IBSS | 553 NL80211_FEATURE_HT_IBSS |
575 NL80211_FEATURE_VIF_TXPOWER; 554 NL80211_FEATURE_VIF_TXPOWER |
555 NL80211_FEATURE_USERSPACE_MPM;
576 556
577 if (!ops->hw_scan) 557 if (!ops->hw_scan)
578 wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | 558 wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -609,6 +589,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
609 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH; 589 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
610 local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; 590 local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
611 wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask; 591 wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask;
592 wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask;
612 593
613 INIT_LIST_HEAD(&local->interfaces); 594 INIT_LIST_HEAD(&local->interfaces);
614 595
@@ -664,9 +645,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
664 skb_queue_head_init(&local->skb_queue); 645 skb_queue_head_init(&local->skb_queue);
665 skb_queue_head_init(&local->skb_queue_unreliable); 646 skb_queue_head_init(&local->skb_queue_unreliable);
666 647
667 /* init dummy netdev for use w/ NAPI */
668 init_dummy_netdev(&local->napi_dev);
669
670 ieee80211_led_names(local); 648 ieee80211_led_names(local);
671 649
672 ieee80211_roc_setup(local); 650 ieee80211_roc_setup(local);
@@ -1021,9 +999,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
1021 goto fail_ifa6; 999 goto fail_ifa6;
1022#endif 1000#endif
1023 1001
1024 netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll,
1025 local->hw.napi_weight);
1026
1027 return 0; 1002 return 0;
1028 1003
1029#if IS_ENABLED(CONFIG_IPV6) 1004#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 4749b3858695..123a300cef57 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -13,10 +13,6 @@
13#include "ieee80211_i.h" 13#include "ieee80211_i.h"
14#include "mesh.h" 14#include "mesh.h"
15 15
16#define TMR_RUNNING_HK 0
17#define TMR_RUNNING_MP 1
18#define TMR_RUNNING_MPR 2
19
20static int mesh_allocated; 16static int mesh_allocated;
21static struct kmem_cache *rm_cache; 17static struct kmem_cache *rm_cache;
22 18
@@ -50,11 +46,6 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data)
50 46
51 set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); 47 set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
52 48
53 if (local->quiescing) {
54 set_bit(TMR_RUNNING_HK, &ifmsh->timers_running);
55 return;
56 }
57
58 ieee80211_queue_work(&local->hw, &sdata->work); 49 ieee80211_queue_work(&local->hw, &sdata->work);
59} 50}
60 51
@@ -165,7 +156,7 @@ void mesh_sta_cleanup(struct sta_info *sta)
165 * an update. 156 * an update.
166 */ 157 */
167 changed = mesh_accept_plinks_update(sdata); 158 changed = mesh_accept_plinks_update(sdata);
168 if (sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { 159 if (!sdata->u.mesh.user_mpm) {
169 changed |= mesh_plink_deactivate(sta); 160 changed |= mesh_plink_deactivate(sta);
170 del_timer_sync(&sta->plink_timer); 161 del_timer_sync(&sta->plink_timer);
171 } 162 }
@@ -479,15 +470,8 @@ static void ieee80211_mesh_path_timer(unsigned long data)
479{ 470{
480 struct ieee80211_sub_if_data *sdata = 471 struct ieee80211_sub_if_data *sdata =
481 (struct ieee80211_sub_if_data *) data; 472 (struct ieee80211_sub_if_data *) data;
482 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
483 struct ieee80211_local *local = sdata->local;
484
485 if (local->quiescing) {
486 set_bit(TMR_RUNNING_MP, &ifmsh->timers_running);
487 return;
488 }
489 473
490 ieee80211_queue_work(&local->hw, &sdata->work); 474 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
491} 475}
492 476
493static void ieee80211_mesh_path_root_timer(unsigned long data) 477static void ieee80211_mesh_path_root_timer(unsigned long data)
@@ -495,16 +479,10 @@ static void ieee80211_mesh_path_root_timer(unsigned long data)
495 struct ieee80211_sub_if_data *sdata = 479 struct ieee80211_sub_if_data *sdata =
496 (struct ieee80211_sub_if_data *) data; 480 (struct ieee80211_sub_if_data *) data;
497 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 481 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
498 struct ieee80211_local *local = sdata->local;
499 482
500 set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); 483 set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags);
501 484
502 if (local->quiescing) { 485 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
503 set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running);
504 return;
505 }
506
507 ieee80211_queue_work(&local->hw, &sdata->work);
508} 486}
509 487
510void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) 488void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh)
@@ -622,35 +600,6 @@ static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata)
622 round_jiffies(TU_TO_EXP_TIME(interval))); 600 round_jiffies(TU_TO_EXP_TIME(interval)));
623} 601}
624 602
625#ifdef CONFIG_PM
626void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata)
627{
628 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
629
630 /* use atomic bitops in case all timers fire at the same time */
631
632 if (del_timer_sync(&ifmsh->housekeeping_timer))
633 set_bit(TMR_RUNNING_HK, &ifmsh->timers_running);
634 if (del_timer_sync(&ifmsh->mesh_path_timer))
635 set_bit(TMR_RUNNING_MP, &ifmsh->timers_running);
636 if (del_timer_sync(&ifmsh->mesh_path_root_timer))
637 set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running);
638}
639
640void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata)
641{
642 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
643
644 if (test_and_clear_bit(TMR_RUNNING_HK, &ifmsh->timers_running))
645 add_timer(&ifmsh->housekeeping_timer);
646 if (test_and_clear_bit(TMR_RUNNING_MP, &ifmsh->timers_running))
647 add_timer(&ifmsh->mesh_path_timer);
648 if (test_and_clear_bit(TMR_RUNNING_MPR, &ifmsh->timers_running))
649 add_timer(&ifmsh->mesh_path_root_timer);
650 ieee80211_mesh_root_setup(ifmsh);
651}
652#endif
653
654static int 603static int
655ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) 604ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
656{ 605{
@@ -750,10 +699,8 @@ out_free:
750static int 699static int
751ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh) 700ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh)
752{ 701{
753 struct ieee80211_sub_if_data *sdata;
754 struct beacon_data *old_bcn; 702 struct beacon_data *old_bcn;
755 int ret; 703 int ret;
756 sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
757 704
758 mutex_lock(&ifmsh->mtx); 705 mutex_lock(&ifmsh->mtx);
759 706
@@ -871,8 +818,6 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
871 local->fif_other_bss--; 818 local->fif_other_bss--;
872 atomic_dec(&local->iff_allmultis); 819 atomic_dec(&local->iff_allmultis);
873 ieee80211_configure_filter(local); 820 ieee80211_configure_filter(local);
874
875 sdata->u.mesh.timers_running = 0;
876} 821}
877 822
878static void 823static void
@@ -886,9 +831,8 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
886 struct ieee80211_mgmt *hdr; 831 struct ieee80211_mgmt *hdr;
887 struct ieee802_11_elems elems; 832 struct ieee802_11_elems elems;
888 size_t baselen; 833 size_t baselen;
889 u8 *pos, *end; 834 u8 *pos;
890 835
891 end = ((u8 *) mgmt) + len;
892 pos = mgmt->u.probe_req.variable; 836 pos = mgmt->u.probe_req.variable;
893 baselen = (u8 *) pos - (u8 *) mgmt; 837 baselen = (u8 *) pos - (u8 *) mgmt;
894 if (baselen > len) 838 if (baselen > len)
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 336c88a16687..6ffabbe99c46 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -313,8 +313,6 @@ void mesh_path_timer(unsigned long data);
313void mesh_path_flush_by_nexthop(struct sta_info *sta); 313void mesh_path_flush_by_nexthop(struct sta_info *sta);
314void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, 314void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
315 struct sk_buff *skb); 315 struct sk_buff *skb);
316void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata);
317void mesh_path_restart(struct ieee80211_sub_if_data *sdata);
318void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); 316void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata);
319 317
320bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); 318bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt);
@@ -359,22 +357,12 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
359 357
360void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local); 358void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
361 359
362void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata);
363void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata);
364void mesh_plink_quiesce(struct sta_info *sta);
365void mesh_plink_restart(struct sta_info *sta);
366void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); 360void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
367void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata); 361void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
368void ieee80211s_stop(void); 362void ieee80211s_stop(void);
369#else 363#else
370static inline void 364static inline void
371ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} 365ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
372static inline void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata)
373{}
374static inline void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata)
375{}
376static inline void mesh_plink_quiesce(struct sta_info *sta) {}
377static inline void mesh_plink_restart(struct sta_info *sta) {}
378static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) 366static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
379{ return false; } 367{ return false; }
380static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) 368static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 07d396d57079..937e06fe8f2a 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -420,7 +420,6 @@ __mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr)
420 return NULL; 420 return NULL;
421 421
422 sta->plink_state = NL80211_PLINK_LISTEN; 422 sta->plink_state = NL80211_PLINK_LISTEN;
423 init_timer(&sta->plink_timer);
424 423
425 sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); 424 sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
426 sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); 425 sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
@@ -437,8 +436,9 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
437{ 436{
438 struct sta_info *sta = NULL; 437 struct sta_info *sta = NULL;
439 438
440 /* Userspace handles peer allocation when security is enabled */ 439 /* Userspace handles station allocation */
441 if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) 440 if (sdata->u.mesh.user_mpm ||
441 sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)
442 cfg80211_notify_new_peer_candidate(sdata->dev, addr, 442 cfg80211_notify_new_peer_candidate(sdata->dev, addr,
443 elems->ie_start, 443 elems->ie_start,
444 elems->total_len, 444 elems->total_len,
@@ -534,10 +534,8 @@ static void mesh_plink_timer(unsigned long data)
534 */ 534 */
535 sta = (struct sta_info *) data; 535 sta = (struct sta_info *) data;
536 536
537 if (sta->sdata->local->quiescing) { 537 if (sta->sdata->local->quiescing)
538 sta->plink_timer_was_running = true;
539 return; 538 return;
540 }
541 539
542 spin_lock_bh(&sta->lock); 540 spin_lock_bh(&sta->lock);
543 if (sta->ignore_plink_timer) { 541 if (sta->ignore_plink_timer) {
@@ -598,29 +596,6 @@ static void mesh_plink_timer(unsigned long data)
598 } 596 }
599} 597}
600 598
601#ifdef CONFIG_PM
602void mesh_plink_quiesce(struct sta_info *sta)
603{
604 if (!ieee80211_vif_is_mesh(&sta->sdata->vif))
605 return;
606
607 /* no kernel mesh sta timers have been initialized */
608 if (sta->sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)
609 return;
610
611 if (del_timer_sync(&sta->plink_timer))
612 sta->plink_timer_was_running = true;
613}
614
615void mesh_plink_restart(struct sta_info *sta)
616{
617 if (sta->plink_timer_was_running) {
618 add_timer(&sta->plink_timer);
619 sta->plink_timer_was_running = false;
620 }
621}
622#endif
623
624static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout) 599static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout)
625{ 600{
626 sta->plink_timer.expires = jiffies + (HZ * timeout / 1000); 601 sta->plink_timer.expires = jiffies + (HZ * timeout / 1000);
@@ -695,6 +670,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
695 if (len < IEEE80211_MIN_ACTION_SIZE + 3) 670 if (len < IEEE80211_MIN_ACTION_SIZE + 3)
696 return; 671 return;
697 672
673 if (sdata->u.mesh.user_mpm)
674 /* userspace must register for these */
675 return;
676
698 if (is_multicast_ether_addr(mgmt->da)) { 677 if (is_multicast_ether_addr(mgmt->da)) {
699 mpl_dbg(sdata, 678 mpl_dbg(sdata,
700 "Mesh plink: ignore frame from multicast address\n"); 679 "Mesh plink: ignore frame from multicast address\n");
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 346ad4cfb013..dec42ab1fa91 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -87,9 +87,6 @@ MODULE_PARM_DESC(probe_wait_ms,
87 */ 87 */
88#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4 88#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
89 89
90#define TMR_RUNNING_TIMER 0
91#define TMR_RUNNING_CHANSW 1
92
93/* 90/*
94 * All cfg80211 functions have to be called outside a locked 91 * All cfg80211 functions have to be called outside a locked
95 * section so that they can acquire a lock themselves... This 92 * section so that they can acquire a lock themselves... This
@@ -609,6 +606,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
609 BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap)); 606 BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap));
610 607
611 memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap)); 608 memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap));
609 ieee80211_apply_vhtcap_overrides(sdata, &vht_cap);
612 610
613 /* determine capability flags */ 611 /* determine capability flags */
614 cap = vht_cap.cap; 612 cap = vht_cap.cap;
@@ -1011,6 +1009,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
1011 1009
1012 /* XXX: wait for a beacon first? */ 1010 /* XXX: wait for a beacon first? */
1013 ieee80211_wake_queues_by_reason(&sdata->local->hw, 1011 ieee80211_wake_queues_by_reason(&sdata->local->hw,
1012 IEEE80211_MAX_QUEUE_MAP,
1014 IEEE80211_QUEUE_STOP_REASON_CSA); 1013 IEEE80211_QUEUE_STOP_REASON_CSA);
1015 out: 1014 out:
1016 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; 1015 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
@@ -1038,14 +1037,8 @@ static void ieee80211_chswitch_timer(unsigned long data)
1038{ 1037{
1039 struct ieee80211_sub_if_data *sdata = 1038 struct ieee80211_sub_if_data *sdata =
1040 (struct ieee80211_sub_if_data *) data; 1039 (struct ieee80211_sub_if_data *) data;
1041 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1042 1040
1043 if (sdata->local->quiescing) { 1041 ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
1044 set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running);
1045 return;
1046 }
1047
1048 ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
1049} 1042}
1050 1043
1051void 1044void
@@ -1116,6 +1109,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1116 1109
1117 if (sw_elem->mode) 1110 if (sw_elem->mode)
1118 ieee80211_stop_queues_by_reason(&sdata->local->hw, 1111 ieee80211_stop_queues_by_reason(&sdata->local->hw,
1112 IEEE80211_MAX_QUEUE_MAP,
1119 IEEE80211_QUEUE_STOP_REASON_CSA); 1113 IEEE80211_QUEUE_STOP_REASON_CSA);
1120 1114
1121 if (sdata->local->ops->channel_switch) { 1115 if (sdata->local->ops->channel_switch) {
@@ -1383,6 +1377,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work)
1383 } 1377 }
1384 1378
1385 ieee80211_wake_queues_by_reason(&local->hw, 1379 ieee80211_wake_queues_by_reason(&local->hw,
1380 IEEE80211_MAX_QUEUE_MAP,
1386 IEEE80211_QUEUE_STOP_REASON_PS); 1381 IEEE80211_QUEUE_STOP_REASON_PS);
1387} 1382}
1388 1383
@@ -1444,7 +1439,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
1444 else { 1439 else {
1445 ieee80211_send_nullfunc(local, sdata, 1); 1440 ieee80211_send_nullfunc(local, sdata, 1);
1446 /* Flush to get the tx status of nullfunc frame */ 1441 /* Flush to get the tx status of nullfunc frame */
1447 drv_flush(local, false); 1442 ieee80211_flush_queues(local, sdata);
1448 } 1443 }
1449 } 1444 }
1450 1445
@@ -1775,7 +1770,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1775 1770
1776 /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */ 1771 /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */
1777 if (tx) 1772 if (tx)
1778 drv_flush(local, false); 1773 ieee80211_flush_queues(local, sdata);
1779 1774
1780 /* deauthenticate/disassociate now */ 1775 /* deauthenticate/disassociate now */
1781 if (tx || frame_buf) 1776 if (tx || frame_buf)
@@ -1784,7 +1779,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1784 1779
1785 /* flush out frame */ 1780 /* flush out frame */
1786 if (tx) 1781 if (tx)
1787 drv_flush(local, false); 1782 ieee80211_flush_queues(local, sdata);
1788 1783
1789 /* clear bssid only after building the needed mgmt frames */ 1784 /* clear bssid only after building the needed mgmt frames */
1790 memset(ifmgd->bssid, 0, ETH_ALEN); 1785 memset(ifmgd->bssid, 0, ETH_ALEN);
@@ -1802,9 +1797,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1802 sdata->vif.bss_conf.p2p_ctwindow = 0; 1797 sdata->vif.bss_conf.p2p_ctwindow = 0;
1803 sdata->vif.bss_conf.p2p_oppps = false; 1798 sdata->vif.bss_conf.p2p_oppps = false;
1804 1799
1805 /* on the next assoc, re-program HT parameters */ 1800 /* on the next assoc, re-program HT/VHT parameters */
1806 memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa)); 1801 memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa));
1807 memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask)); 1802 memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask));
1803 memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa));
1804 memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask));
1808 1805
1809 sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; 1806 sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
1810 1807
@@ -1830,8 +1827,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1830 del_timer_sync(&sdata->u.mgd.timer); 1827 del_timer_sync(&sdata->u.mgd.timer);
1831 del_timer_sync(&sdata->u.mgd.chswitch_timer); 1828 del_timer_sync(&sdata->u.mgd.chswitch_timer);
1832 1829
1833 sdata->u.mgd.timers_running = 0;
1834
1835 sdata->vif.bss_conf.dtim_period = 0; 1830 sdata->vif.bss_conf.dtim_period = 0;
1836 1831
1837 ifmgd->flags = 0; 1832 ifmgd->flags = 0;
@@ -1956,7 +1951,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1956 ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); 1951 ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
1957 run_again(ifmgd, ifmgd->probe_timeout); 1952 run_again(ifmgd, ifmgd->probe_timeout);
1958 if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 1953 if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
1959 drv_flush(sdata->local, false); 1954 ieee80211_flush_queues(sdata->local, sdata);
1960} 1955}
1961 1956
1962static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, 1957static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
@@ -2079,6 +2074,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
2079 true, frame_buf); 2074 true, frame_buf);
2080 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; 2075 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
2081 ieee80211_wake_queues_by_reason(&sdata->local->hw, 2076 ieee80211_wake_queues_by_reason(&sdata->local->hw,
2077 IEEE80211_MAX_QUEUE_MAP,
2082 IEEE80211_QUEUE_STOP_REASON_CSA); 2078 IEEE80211_QUEUE_STOP_REASON_CSA);
2083 mutex_unlock(&ifmgd->mtx); 2079 mutex_unlock(&ifmgd->mtx);
2084 2080
@@ -3140,15 +3136,8 @@ static void ieee80211_sta_timer(unsigned long data)
3140{ 3136{
3141 struct ieee80211_sub_if_data *sdata = 3137 struct ieee80211_sub_if_data *sdata =
3142 (struct ieee80211_sub_if_data *) data; 3138 (struct ieee80211_sub_if_data *) data;
3143 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3144 struct ieee80211_local *local = sdata->local;
3145
3146 if (local->quiescing) {
3147 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
3148 return;
3149 }
3150 3139
3151 ieee80211_queue_work(&local->hw, &sdata->work); 3140 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
3152} 3141}
3153 3142
3154static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, 3143static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
@@ -3500,72 +3489,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
3500 } 3489 }
3501} 3490}
3502 3491
3503#ifdef CONFIG_PM
3504void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
3505{
3506 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3507
3508 /*
3509 * Stop timers before deleting work items, as timers
3510 * could race and re-add the work-items. They will be
3511 * re-established on connection.
3512 */
3513 del_timer_sync(&ifmgd->conn_mon_timer);
3514 del_timer_sync(&ifmgd->bcn_mon_timer);
3515
3516 /*
3517 * we need to use atomic bitops for the running bits
3518 * only because both timers might fire at the same
3519 * time -- the code here is properly synchronised.
3520 */
3521
3522 cancel_work_sync(&ifmgd->request_smps_work);
3523
3524 cancel_work_sync(&ifmgd->monitor_work);
3525 cancel_work_sync(&ifmgd->beacon_connection_loss_work);
3526 cancel_work_sync(&ifmgd->csa_connection_drop_work);
3527 if (del_timer_sync(&ifmgd->timer))
3528 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
3529
3530 if (del_timer_sync(&ifmgd->chswitch_timer))
3531 set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running);
3532 cancel_work_sync(&ifmgd->chswitch_work);
3533}
3534
3535void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
3536{
3537 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3538
3539 mutex_lock(&ifmgd->mtx);
3540 if (!ifmgd->associated) {
3541 mutex_unlock(&ifmgd->mtx);
3542 return;
3543 }
3544
3545 if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) {
3546 sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME;
3547 mlme_dbg(sdata, "driver requested disconnect after resume\n");
3548 ieee80211_sta_connection_lost(sdata,
3549 ifmgd->associated->bssid,
3550 WLAN_REASON_UNSPECIFIED,
3551 true);
3552 mutex_unlock(&ifmgd->mtx);
3553 return;
3554 }
3555 mutex_unlock(&ifmgd->mtx);
3556
3557 if (test_and_clear_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running))
3558 add_timer(&ifmgd->timer);
3559 if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running))
3560 add_timer(&ifmgd->chswitch_timer);
3561 ieee80211_sta_reset_beacon_monitor(sdata);
3562
3563 mutex_lock(&sdata->local->mtx);
3564 ieee80211_restart_sta_timer(sdata);
3565 mutex_unlock(&sdata->local->mtx);
3566}
3567#endif
3568
3569/* interface setup */ 3492/* interface setup */
3570void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) 3493void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
3571{ 3494{
@@ -4089,6 +4012,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4089 ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; 4012 ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
4090 } 4013 }
4091 4014
4015 if (req->flags & ASSOC_REQ_DISABLE_VHT)
4016 ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
4017
4092 /* Also disable HT if we don't support it or the AP doesn't use WMM */ 4018 /* Also disable HT if we don't support it or the AP doesn't use WMM */
4093 sband = local->hw.wiphy->bands[req->bss->channel->band]; 4019 sband = local->hw.wiphy->bands[req->bss->channel->band];
4094 if (!sband->ht_cap.ht_supported || 4020 if (!sband->ht_cap.ht_supported ||
@@ -4112,6 +4038,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4112 memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask, 4038 memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask,
4113 sizeof(ifmgd->ht_capa_mask)); 4039 sizeof(ifmgd->ht_capa_mask));
4114 4040
4041 memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa));
4042 memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask,
4043 sizeof(ifmgd->vht_capa_mask));
4044
4115 if (req->ie && req->ie_len) { 4045 if (req->ie && req->ie_len) {
4116 memcpy(assoc_data->ie, req->ie, req->ie_len); 4046 memcpy(assoc_data->ie, req->ie, req->ie_len);
4117 assoc_data->ie_len = req->ie_len; 4047 assoc_data->ie_len = req->ie_len;
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 430bd254e496..cce795871ab1 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -118,9 +118,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
118 * Stop queues and transmit all frames queued by the driver 118 * Stop queues and transmit all frames queued by the driver
119 * before sending nullfunc to enable powersave at the AP. 119 * before sending nullfunc to enable powersave at the AP.
120 */ 120 */
121 ieee80211_stop_queues_by_reason(&local->hw, 121 ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
122 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); 122 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
123 drv_flush(local, false); 123 ieee80211_flush_queues(local, NULL);
124 124
125 mutex_lock(&local->iflist_mtx); 125 mutex_lock(&local->iflist_mtx);
126 list_for_each_entry(sdata, &local->interfaces, list) { 126 list_for_each_entry(sdata, &local->interfaces, list) {
@@ -181,7 +181,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
181 } 181 }
182 mutex_unlock(&local->iflist_mtx); 182 mutex_unlock(&local->iflist_mtx);
183 183
184 ieee80211_wake_queues_by_reason(&local->hw, 184 ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
185 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); 185 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
186} 186}
187 187
@@ -277,7 +277,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
277 duration = 10; 277 duration = 10;
278 278
279 ret = drv_remain_on_channel(local, roc->sdata, roc->chan, 279 ret = drv_remain_on_channel(local, roc->sdata, roc->chan,
280 duration); 280 duration, roc->type);
281 281
282 roc->started = true; 282 roc->started = true;
283 283
@@ -382,7 +382,7 @@ void ieee80211_sw_roc_work(struct work_struct *work)
382 ieee80211_roc_notify_destroy(roc, !roc->abort); 382 ieee80211_roc_notify_destroy(roc, !roc->abort);
383 383
384 if (started) { 384 if (started) {
385 drv_flush(local, false); 385 ieee80211_flush_queues(local, NULL);
386 386
387 local->tmp_channel = NULL; 387 local->tmp_channel = NULL;
388 ieee80211_hw_config(local, 0); 388 ieee80211_hw_config(local, 0);
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d0275f34bf70..3d16f4e61743 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -6,32 +6,11 @@
6#include "driver-ops.h" 6#include "driver-ops.h"
7#include "led.h" 7#include "led.h"
8 8
9/* return value indicates whether the driver should be further notified */
10static void ieee80211_quiesce(struct ieee80211_sub_if_data *sdata)
11{
12 switch (sdata->vif.type) {
13 case NL80211_IFTYPE_STATION:
14 ieee80211_sta_quiesce(sdata);
15 break;
16 case NL80211_IFTYPE_ADHOC:
17 ieee80211_ibss_quiesce(sdata);
18 break;
19 case NL80211_IFTYPE_MESH_POINT:
20 ieee80211_mesh_quiesce(sdata);
21 break;
22 default:
23 break;
24 }
25
26 cancel_work_sync(&sdata->work);
27}
28
29int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) 9int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
30{ 10{
31 struct ieee80211_local *local = hw_to_local(hw); 11 struct ieee80211_local *local = hw_to_local(hw);
32 struct ieee80211_sub_if_data *sdata; 12 struct ieee80211_sub_if_data *sdata;
33 struct sta_info *sta; 13 struct sta_info *sta;
34 struct ieee80211_chanctx *ctx;
35 14
36 if (!local->open_count) 15 if (!local->open_count)
37 goto suspend; 16 goto suspend;
@@ -51,12 +30,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
51 } 30 }
52 31
53 ieee80211_stop_queues_by_reason(hw, 32 ieee80211_stop_queues_by_reason(hw,
54 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 33 IEEE80211_MAX_QUEUE_MAP,
34 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
55 35
56 /* flush out all packets */ 36 /* flush out all packets */
57 synchronize_net(); 37 synchronize_net();
58 38
59 drv_flush(local, false); 39 ieee80211_flush_queues(local, NULL);
60 40
61 local->quiescing = true; 41 local->quiescing = true;
62 /* make quiescing visible to timers everywhere */ 42 /* make quiescing visible to timers everywhere */
@@ -89,23 +69,17 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
89 mutex_unlock(&local->sta_mtx); 69 mutex_unlock(&local->sta_mtx);
90 } 70 }
91 ieee80211_wake_queues_by_reason(hw, 71 ieee80211_wake_queues_by_reason(hw,
72 IEEE80211_MAX_QUEUE_MAP,
92 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 73 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
93 return err; 74 return err;
94 } else if (err > 0) { 75 } else if (err > 0) {
95 WARN_ON(err != 1); 76 WARN_ON(err != 1);
96 local->wowlan = false; 77 return err;
97 } else { 78 } else {
98 list_for_each_entry(sdata, &local->interfaces, list)
99 if (ieee80211_sdata_running(sdata))
100 ieee80211_quiesce(sdata);
101 goto suspend; 79 goto suspend;
102 } 80 }
103 } 81 }
104 82
105 /* disable keys */
106 list_for_each_entry(sdata, &local->interfaces, list)
107 ieee80211_disable_keys(sdata);
108
109 /* tear down aggregation sessions and remove STAs */ 83 /* tear down aggregation sessions and remove STAs */
110 mutex_lock(&local->sta_mtx); 84 mutex_lock(&local->sta_mtx);
111 list_for_each_entry(sta, &local->sta_list, list) { 85 list_for_each_entry(sta, &local->sta_list, list) {
@@ -117,100 +91,25 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
117 WARN_ON(drv_sta_state(local, sta->sdata, sta, 91 WARN_ON(drv_sta_state(local, sta->sdata, sta,
118 state, state - 1)); 92 state, state - 1));
119 } 93 }
120
121 mesh_plink_quiesce(sta);
122 } 94 }
123 mutex_unlock(&local->sta_mtx); 95 mutex_unlock(&local->sta_mtx);
124 96
125 /* remove all interfaces */ 97 /* remove all interfaces */
126 list_for_each_entry(sdata, &local->interfaces, list) { 98 list_for_each_entry(sdata, &local->interfaces, list) {
127 static u8 zero_addr[ETH_ALEN] = {};
128 u32 changed = 0;
129
130 if (!ieee80211_sdata_running(sdata)) 99 if (!ieee80211_sdata_running(sdata))
131 continue; 100 continue;
132
133 switch (sdata->vif.type) {
134 case NL80211_IFTYPE_AP_VLAN:
135 case NL80211_IFTYPE_MONITOR:
136 /* skip these */
137 continue;
138 case NL80211_IFTYPE_STATION:
139 if (sdata->vif.bss_conf.assoc)
140 changed = BSS_CHANGED_ASSOC |
141 BSS_CHANGED_BSSID |
142 BSS_CHANGED_IDLE;
143 break;
144 case NL80211_IFTYPE_AP:
145 case NL80211_IFTYPE_ADHOC:
146 case NL80211_IFTYPE_MESH_POINT:
147 if (sdata->vif.bss_conf.enable_beacon)
148 changed = BSS_CHANGED_BEACON_ENABLED;
149 break;
150 default:
151 break;
152 }
153
154 ieee80211_quiesce(sdata);
155
156 sdata->suspend_bss_conf = sdata->vif.bss_conf;
157 memset(&sdata->vif.bss_conf, 0, sizeof(sdata->vif.bss_conf));
158 sdata->vif.bss_conf.idle = true;
159 if (sdata->suspend_bss_conf.bssid)
160 sdata->vif.bss_conf.bssid = zero_addr;
161
162 /* disable beaconing or remove association */
163 ieee80211_bss_info_change_notify(sdata, changed);
164
165 if (sdata->vif.type == NL80211_IFTYPE_AP &&
166 rcu_access_pointer(sdata->u.ap.beacon))
167 drv_stop_ap(local, sdata);
168
169 if (local->use_chanctx) {
170 struct ieee80211_chanctx_conf *conf;
171
172 mutex_lock(&local->chanctx_mtx);
173 conf = rcu_dereference_protected(
174 sdata->vif.chanctx_conf,
175 lockdep_is_held(&local->chanctx_mtx));
176 if (conf) {
177 ctx = container_of(conf,
178 struct ieee80211_chanctx,
179 conf);
180 drv_unassign_vif_chanctx(local, sdata, ctx);
181 }
182
183 mutex_unlock(&local->chanctx_mtx);
184 }
185 drv_remove_interface(local, sdata); 101 drv_remove_interface(local, sdata);
186 } 102 }
187 103
188 sdata = rtnl_dereference(local->monitor_sdata); 104 sdata = rtnl_dereference(local->monitor_sdata);
189 if (sdata) { 105 if (sdata)
190 if (local->use_chanctx) {
191 struct ieee80211_chanctx_conf *conf;
192
193 mutex_lock(&local->chanctx_mtx);
194 conf = rcu_dereference_protected(
195 sdata->vif.chanctx_conf,
196 lockdep_is_held(&local->chanctx_mtx));
197 if (conf) {
198 ctx = container_of(conf,
199 struct ieee80211_chanctx,
200 conf);
201 drv_unassign_vif_chanctx(local, sdata, ctx);
202 }
203
204 mutex_unlock(&local->chanctx_mtx);
205 }
206
207 drv_remove_interface(local, sdata); 106 drv_remove_interface(local, sdata);
208 }
209 107
210 mutex_lock(&local->chanctx_mtx); 108 /*
211 list_for_each_entry(ctx, &local->chanctx_list, list) 109 * We disconnected on all interfaces before suspend, all channel
212 drv_remove_chanctx(local, ctx); 110 * contexts should be released.
213 mutex_unlock(&local->chanctx_mtx); 111 */
112 WARN_ON(!list_empty(&local->chanctx_list));
214 113
215 /* stop hardware - this must stop RX */ 114 /* stop hardware - this must stop RX */
216 if (local->open_count) 115 if (local->open_count)
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index eea45a2c7c35..1c36c9b4fa4a 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -55,7 +55,6 @@
55#include "rate.h" 55#include "rate.h"
56#include "rc80211_minstrel.h" 56#include "rc80211_minstrel.h"
57 57
58#define SAMPLE_COLUMNS 10
59#define SAMPLE_TBL(_mi, _idx, _col) \ 58#define SAMPLE_TBL(_mi, _idx, _col) \
60 _mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col] 59 _mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col]
61 60
@@ -70,16 +69,31 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
70 return i; 69 return i;
71} 70}
72 71
72/* find & sort topmost throughput rates */
73static inline void
74minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
75{
76 int j = MAX_THR_RATES;
77
78 while (j > 0 && mi->r[i].cur_tp > mi->r[tp_list[j - 1]].cur_tp)
79 j--;
80 if (j < MAX_THR_RATES - 1)
81 memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1));
82 if (j < MAX_THR_RATES)
83 tp_list[j] = i;
84}
85
73static void 86static void
74minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) 87minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
75{ 88{
76 u32 max_tp = 0, index_max_tp = 0, index_max_tp2 = 0; 89 u8 tmp_tp_rate[MAX_THR_RATES];
77 u32 max_prob = 0, index_max_prob = 0; 90 u8 tmp_prob_rate = 0;
78 u32 usecs; 91 u32 usecs;
79 u32 p;
80 int i; 92 int i;
81 93
82 mi->stats_update = jiffies; 94 for (i=0; i < MAX_THR_RATES; i++)
95 tmp_tp_rate[i] = 0;
96
83 for (i = 0; i < mi->n_rates; i++) { 97 for (i = 0; i < mi->n_rates; i++) {
84 struct minstrel_rate *mr = &mi->r[i]; 98 struct minstrel_rate *mr = &mi->r[i];
85 99
@@ -87,27 +101,32 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
87 if (!usecs) 101 if (!usecs)
88 usecs = 1000000; 102 usecs = 1000000;
89 103
90 /* To avoid rounding issues, probabilities scale from 0 (0%) 104 if (unlikely(mr->attempts > 0)) {
91 * to 18000 (100%) */ 105 mr->sample_skipped = 0;
92 if (mr->attempts) { 106 mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts);
93 p = (mr->success * 18000) / mr->attempts;
94 mr->succ_hist += mr->success; 107 mr->succ_hist += mr->success;
95 mr->att_hist += mr->attempts; 108 mr->att_hist += mr->attempts;
96 mr->cur_prob = p; 109 mr->probability = minstrel_ewma(mr->probability,
97 p = ((p * (100 - mp->ewma_level)) + (mr->probability * 110 mr->cur_prob,
98 mp->ewma_level)) / 100; 111 EWMA_LEVEL);
99 mr->probability = p; 112 } else
100 mr->cur_tp = p * (1000000 / usecs); 113 mr->sample_skipped++;
101 }
102 114
103 mr->last_success = mr->success; 115 mr->last_success = mr->success;
104 mr->last_attempts = mr->attempts; 116 mr->last_attempts = mr->attempts;
105 mr->success = 0; 117 mr->success = 0;
106 mr->attempts = 0; 118 mr->attempts = 0;
107 119
120 /* Update throughput per rate, reset thr. below 10% success */
121 if (mr->probability < MINSTREL_FRAC(10, 100))
122 mr->cur_tp = 0;
123 else
124 mr->cur_tp = mr->probability * (1000000 / usecs);
125
108 /* Sample less often below the 10% chance of success. 126 /* Sample less often below the 10% chance of success.
109 * Sample less often above the 95% chance of success. */ 127 * Sample less often above the 95% chance of success. */
110 if ((mr->probability > 17100) || (mr->probability < 1800)) { 128 if (mr->probability > MINSTREL_FRAC(95, 100) ||
129 mr->probability < MINSTREL_FRAC(10, 100)) {
111 mr->adjusted_retry_count = mr->retry_count >> 1; 130 mr->adjusted_retry_count = mr->retry_count >> 1;
112 if (mr->adjusted_retry_count > 2) 131 if (mr->adjusted_retry_count > 2)
113 mr->adjusted_retry_count = 2; 132 mr->adjusted_retry_count = 2;
@@ -118,35 +137,30 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
118 } 137 }
119 if (!mr->adjusted_retry_count) 138 if (!mr->adjusted_retry_count)
120 mr->adjusted_retry_count = 2; 139 mr->adjusted_retry_count = 2;
121 }
122 140
123 for (i = 0; i < mi->n_rates; i++) { 141 minstrel_sort_best_tp_rates(mi, i, tmp_tp_rate);
124 struct minstrel_rate *mr = &mi->r[i]; 142
125 if (max_tp < mr->cur_tp) { 143 /* To determine the most robust rate (max_prob_rate) used at
126 index_max_tp = i; 144 * 3rd mmr stage we distinct between two cases:
127 max_tp = mr->cur_tp; 145 * (1) if any success probabilitiy >= 95%, out of those rates
128 } 146 * choose the maximum throughput rate as max_prob_rate
129 if (max_prob < mr->probability) { 147 * (2) if all success probabilities < 95%, the rate with
130 index_max_prob = i; 148 * highest success probability is choosen as max_prob_rate */
131 max_prob = mr->probability; 149 if (mr->probability >= MINSTREL_FRAC(95,100)) {
150 if (mr->cur_tp >= mi->r[tmp_prob_rate].cur_tp)
151 tmp_prob_rate = i;
152 } else {
153 if (mr->probability >= mi->r[tmp_prob_rate].probability)
154 tmp_prob_rate = i;
132 } 155 }
133 } 156 }
134 157
135 max_tp = 0; 158 /* Assign the new rate set */
136 for (i = 0; i < mi->n_rates; i++) { 159 memcpy(mi->max_tp_rate, tmp_tp_rate, sizeof(mi->max_tp_rate));
137 struct minstrel_rate *mr = &mi->r[i]; 160 mi->max_prob_rate = tmp_prob_rate;
138
139 if (i == index_max_tp)
140 continue;
141 161
142 if (max_tp < mr->cur_tp) { 162 /* Reset update timer */
143 index_max_tp2 = i; 163 mi->stats_update = jiffies;
144 max_tp = mr->cur_tp;
145 }
146 }
147 mi->max_tp_rate = index_max_tp;
148 mi->max_tp_rate2 = index_max_tp2;
149 mi->max_prob_rate = index_max_prob;
150} 164}
151 165
152static void 166static void
@@ -207,10 +221,10 @@ static int
207minstrel_get_next_sample(struct minstrel_sta_info *mi) 221minstrel_get_next_sample(struct minstrel_sta_info *mi)
208{ 222{
209 unsigned int sample_ndx; 223 unsigned int sample_ndx;
210 sample_ndx = SAMPLE_TBL(mi, mi->sample_idx, mi->sample_column); 224 sample_ndx = SAMPLE_TBL(mi, mi->sample_row, mi->sample_column);
211 mi->sample_idx++; 225 mi->sample_row++;
212 if ((int) mi->sample_idx > (mi->n_rates - 2)) { 226 if ((int) mi->sample_row >= mi->n_rates) {
213 mi->sample_idx = 0; 227 mi->sample_row = 0;
214 mi->sample_column++; 228 mi->sample_column++;
215 if (mi->sample_column >= SAMPLE_COLUMNS) 229 if (mi->sample_column >= SAMPLE_COLUMNS)
216 mi->sample_column = 0; 230 mi->sample_column = 0;
@@ -228,31 +242,37 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
228 struct minstrel_priv *mp = priv; 242 struct minstrel_priv *mp = priv;
229 struct ieee80211_tx_rate *ar = info->control.rates; 243 struct ieee80211_tx_rate *ar = info->control.rates;
230 unsigned int ndx, sample_ndx = 0; 244 unsigned int ndx, sample_ndx = 0;
231 bool mrr; 245 bool mrr_capable;
232 bool sample_slower = false; 246 bool indirect_rate_sampling = false;
233 bool sample = false; 247 bool rate_sampling = false;
234 int i, delta; 248 int i, delta;
235 int mrr_ndx[3]; 249 int mrr_ndx[3];
236 int sample_rate; 250 int sampling_ratio;
237 251
252 /* management/no-ack frames do not use rate control */
238 if (rate_control_send_low(sta, priv_sta, txrc)) 253 if (rate_control_send_low(sta, priv_sta, txrc))
239 return; 254 return;
240 255
241 mrr = mp->has_mrr && !txrc->rts && !txrc->bss_conf->use_cts_prot; 256 /* check multi-rate-retry capabilities & adjust lookaround_rate */
242 257 mrr_capable = mp->has_mrr &&
243 ndx = mi->max_tp_rate; 258 !txrc->rts &&
244 259 !txrc->bss_conf->use_cts_prot;
245 if (mrr) 260 if (mrr_capable)
246 sample_rate = mp->lookaround_rate_mrr; 261 sampling_ratio = mp->lookaround_rate_mrr;
247 else 262 else
248 sample_rate = mp->lookaround_rate; 263 sampling_ratio = mp->lookaround_rate;
264
265 /* init rateindex [ndx] with max throughput rate */
266 ndx = mi->max_tp_rate[0];
249 267
268 /* increase sum packet counter */
250 mi->packet_count++; 269 mi->packet_count++;
251 delta = (mi->packet_count * sample_rate / 100) - 270
271 delta = (mi->packet_count * sampling_ratio / 100) -
252 (mi->sample_count + mi->sample_deferred / 2); 272 (mi->sample_count + mi->sample_deferred / 2);
253 273
254 /* delta > 0: sampling required */ 274 /* delta > 0: sampling required */
255 if ((delta > 0) && (mrr || !mi->prev_sample)) { 275 if ((delta > 0) && (mrr_capable || !mi->prev_sample)) {
256 struct minstrel_rate *msr; 276 struct minstrel_rate *msr;
257 if (mi->packet_count >= 10000) { 277 if (mi->packet_count >= 10000) {
258 mi->sample_deferred = 0; 278 mi->sample_deferred = 0;
@@ -271,21 +291,28 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
271 mi->sample_count += (delta - mi->n_rates * 2); 291 mi->sample_count += (delta - mi->n_rates * 2);
272 } 292 }
273 293
294 /* get next random rate sample */
274 sample_ndx = minstrel_get_next_sample(mi); 295 sample_ndx = minstrel_get_next_sample(mi);
275 msr = &mi->r[sample_ndx]; 296 msr = &mi->r[sample_ndx];
276 sample = true; 297 rate_sampling = true;
277 sample_slower = mrr && (msr->perfect_tx_time > 298
278 mi->r[ndx].perfect_tx_time); 299 /* Decide if direct ( 1st mrr stage) or indirect (2nd mrr stage)
279 300 * rate sampling method should be used.
280 if (!sample_slower) { 301 * Respect such rates that are not sampled for 20 interations.
302 */
303 if (mrr_capable &&
304 msr->perfect_tx_time > mi->r[ndx].perfect_tx_time &&
305 msr->sample_skipped < 20)
306 indirect_rate_sampling = true;
307
308 if (!indirect_rate_sampling) {
281 if (msr->sample_limit != 0) { 309 if (msr->sample_limit != 0) {
282 ndx = sample_ndx; 310 ndx = sample_ndx;
283 mi->sample_count++; 311 mi->sample_count++;
284 if (msr->sample_limit > 0) 312 if (msr->sample_limit > 0)
285 msr->sample_limit--; 313 msr->sample_limit--;
286 } else { 314 } else
287 sample = false; 315 rate_sampling = false;
288 }
289 } else { 316 } else {
290 /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark 317 /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
291 * packets that have the sampling rate deferred to the 318 * packets that have the sampling rate deferred to the
@@ -297,34 +324,39 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
297 mi->sample_deferred++; 324 mi->sample_deferred++;
298 } 325 }
299 } 326 }
300 mi->prev_sample = sample; 327 mi->prev_sample = rate_sampling;
301 328
302 /* If we're not using MRR and the sampling rate already 329 /* If we're not using MRR and the sampling rate already
303 * has a probability of >95%, we shouldn't be attempting 330 * has a probability of >95%, we shouldn't be attempting
304 * to use it, as this only wastes precious airtime */ 331 * to use it, as this only wastes precious airtime */
305 if (!mrr && sample && (mi->r[ndx].probability > 17100)) 332 if (!mrr_capable && rate_sampling &&
306 ndx = mi->max_tp_rate; 333 (mi->r[ndx].probability > MINSTREL_FRAC(95, 100)))
334 ndx = mi->max_tp_rate[0];
307 335
336 /* mrr setup for 1st stage */
308 ar[0].idx = mi->r[ndx].rix; 337 ar[0].idx = mi->r[ndx].rix;
309 ar[0].count = minstrel_get_retry_count(&mi->r[ndx], info); 338 ar[0].count = minstrel_get_retry_count(&mi->r[ndx], info);
310 339
311 if (!mrr) { 340 /* non mrr setup for 2nd stage */
312 if (!sample) 341 if (!mrr_capable) {
342 if (!rate_sampling)
313 ar[0].count = mp->max_retry; 343 ar[0].count = mp->max_retry;
314 ar[1].idx = mi->lowest_rix; 344 ar[1].idx = mi->lowest_rix;
315 ar[1].count = mp->max_retry; 345 ar[1].count = mp->max_retry;
316 return; 346 return;
317 } 347 }
318 348
319 /* MRR setup */ 349 /* mrr setup for 2nd stage */
320 if (sample) { 350 if (rate_sampling) {
321 if (sample_slower) 351 if (indirect_rate_sampling)
322 mrr_ndx[0] = sample_ndx; 352 mrr_ndx[0] = sample_ndx;
323 else 353 else
324 mrr_ndx[0] = mi->max_tp_rate; 354 mrr_ndx[0] = mi->max_tp_rate[0];
325 } else { 355 } else {
326 mrr_ndx[0] = mi->max_tp_rate2; 356 mrr_ndx[0] = mi->max_tp_rate[1];
327 } 357 }
358
359 /* mrr setup for 3rd & 4th stage */
328 mrr_ndx[1] = mi->max_prob_rate; 360 mrr_ndx[1] = mi->max_prob_rate;
329 mrr_ndx[2] = 0; 361 mrr_ndx[2] = 0;
330 for (i = 1; i < 4; i++) { 362 for (i = 1; i < 4; i++) {
@@ -351,26 +383,21 @@ static void
351init_sample_table(struct minstrel_sta_info *mi) 383init_sample_table(struct minstrel_sta_info *mi)
352{ 384{
353 unsigned int i, col, new_idx; 385 unsigned int i, col, new_idx;
354 unsigned int n_srates = mi->n_rates - 1;
355 u8 rnd[8]; 386 u8 rnd[8];
356 387
357 mi->sample_column = 0; 388 mi->sample_column = 0;
358 mi->sample_idx = 0; 389 mi->sample_row = 0;
359 memset(mi->sample_table, 0, SAMPLE_COLUMNS * mi->n_rates); 390 memset(mi->sample_table, 0xff, SAMPLE_COLUMNS * mi->n_rates);
360 391
361 for (col = 0; col < SAMPLE_COLUMNS; col++) { 392 for (col = 0; col < SAMPLE_COLUMNS; col++) {
362 for (i = 0; i < n_srates; i++) { 393 for (i = 0; i < mi->n_rates; i++) {
363 get_random_bytes(rnd, sizeof(rnd)); 394 get_random_bytes(rnd, sizeof(rnd));
364 new_idx = (i + rnd[i & 7]) % n_srates; 395 new_idx = (i + rnd[i & 7]) % mi->n_rates;
365 396
366 while (SAMPLE_TBL(mi, new_idx, col) != 0) 397 while (SAMPLE_TBL(mi, new_idx, col) != 0xff)
367 new_idx = (new_idx + 1) % n_srates; 398 new_idx = (new_idx + 1) % mi->n_rates;
368 399
369 /* Don't sample the slowest rate (i.e. slowest base 400 SAMPLE_TBL(mi, new_idx, col) = i;
370 * rate). We must presume that the slowest rate works
371 * fine, or else other management frames will also be
372 * failing and the link will break */
373 SAMPLE_TBL(mi, new_idx, col) = i + 1;
374 } 401 }
375 } 402 }
376} 403}
@@ -542,9 +569,6 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
542 mp->lookaround_rate = 5; 569 mp->lookaround_rate = 5;
543 mp->lookaround_rate_mrr = 10; 570 mp->lookaround_rate_mrr = 10;
544 571
545 /* moving average weight for EWMA */
546 mp->ewma_level = 75;
547
548 /* maximum time that the hw is allowed to stay in one MRR segment */ 572 /* maximum time that the hw is allowed to stay in one MRR segment */
549 mp->segment_size = 6000; 573 mp->segment_size = 6000;
550 574
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 5ecf757817f2..85ebf42cb46d 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -9,6 +9,28 @@
9#ifndef __RC_MINSTREL_H 9#ifndef __RC_MINSTREL_H
10#define __RC_MINSTREL_H 10#define __RC_MINSTREL_H
11 11
12#define EWMA_LEVEL 75 /* ewma weighting factor [%] */
13#define SAMPLE_COLUMNS 10 /* number of columns in sample table */
14
15
16/* scaled fraction values */
17#define MINSTREL_SCALE 16
18#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
19#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
20
21/* number of highest throughput rates to consider*/
22#define MAX_THR_RATES 4
23
24/*
25 * Perform EWMA (Exponentially Weighted Moving Average) calculation
26 */
27static inline int
28minstrel_ewma(int old, int new, int weight)
29{
30 return (new * (100 - weight) + old * weight) / 100;
31}
32
33
12struct minstrel_rate { 34struct minstrel_rate {
13 int bitrate; 35 int bitrate;
14 int rix; 36 int rix;
@@ -26,6 +48,7 @@ struct minstrel_rate {
26 u32 attempts; 48 u32 attempts;
27 u32 last_attempts; 49 u32 last_attempts;
28 u32 last_success; 50 u32 last_success;
51 u8 sample_skipped;
29 52
30 /* parts per thousand */ 53 /* parts per thousand */
31 u32 cur_prob; 54 u32 cur_prob;
@@ -45,14 +68,13 @@ struct minstrel_sta_info {
45 68
46 unsigned int lowest_rix; 69 unsigned int lowest_rix;
47 70
48 unsigned int max_tp_rate; 71 u8 max_tp_rate[MAX_THR_RATES];
49 unsigned int max_tp_rate2; 72 u8 max_prob_rate;
50 unsigned int max_prob_rate;
51 unsigned int packet_count; 73 unsigned int packet_count;
52 unsigned int sample_count; 74 unsigned int sample_count;
53 int sample_deferred; 75 int sample_deferred;
54 76
55 unsigned int sample_idx; 77 unsigned int sample_row;
56 unsigned int sample_column; 78 unsigned int sample_column;
57 79
58 int n_rates; 80 int n_rates;
@@ -73,7 +95,6 @@ struct minstrel_priv {
73 unsigned int cw_min; 95 unsigned int cw_min;
74 unsigned int cw_max; 96 unsigned int cw_max;
75 unsigned int max_retry; 97 unsigned int max_retry;
76 unsigned int ewma_level;
77 unsigned int segment_size; 98 unsigned int segment_size;
78 unsigned int update_interval; 99 unsigned int update_interval;
79 unsigned int lookaround_rate; 100 unsigned int lookaround_rate;
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index d5a56226e675..d1048348d399 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -73,15 +73,17 @@ minstrel_stats_open(struct inode *inode, struct file *file)
73 for (i = 0; i < mi->n_rates; i++) { 73 for (i = 0; i < mi->n_rates; i++) {
74 struct minstrel_rate *mr = &mi->r[i]; 74 struct minstrel_rate *mr = &mi->r[i];
75 75
76 *(p++) = (i == mi->max_tp_rate) ? 'T' : ' '; 76 *(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' ';
77 *(p++) = (i == mi->max_tp_rate2) ? 't' : ' '; 77 *(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' ';
78 *(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' ';
79 *(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' ';
78 *(p++) = (i == mi->max_prob_rate) ? 'P' : ' '; 80 *(p++) = (i == mi->max_prob_rate) ? 'P' : ' ';
79 p += sprintf(p, "%3u%s", mr->bitrate / 2, 81 p += sprintf(p, "%3u%s", mr->bitrate / 2,
80 (mr->bitrate & 1 ? ".5" : " ")); 82 (mr->bitrate & 1 ? ".5" : " "));
81 83
82 tp = mr->cur_tp / ((18000 << 10) / 96); 84 tp = MINSTREL_TRUNC(mr->cur_tp / 10);
83 prob = mr->cur_prob / 18; 85 prob = MINSTREL_TRUNC(mr->cur_prob * 1000);
84 eprob = mr->probability / 18; 86 eprob = MINSTREL_TRUNC(mr->probability * 1000);
85 87
86 p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " 88 p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u "
87 "%3u(%3u) %8llu %8llu\n", 89 "%3u(%3u) %8llu %8llu\n",
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 3af141c69712..d2b264d1311d 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -17,8 +17,6 @@
17#include "rc80211_minstrel_ht.h" 17#include "rc80211_minstrel_ht.h"
18 18
19#define AVG_PKT_SIZE 1200 19#define AVG_PKT_SIZE 1200
20#define SAMPLE_COLUMNS 10
21#define EWMA_LEVEL 75
22 20
23/* Number of bits for an average sized packet */ 21/* Number of bits for an average sized packet */
24#define MCS_NBITS (AVG_PKT_SIZE << 3) 22#define MCS_NBITS (AVG_PKT_SIZE << 3)
@@ -26,11 +24,11 @@
26/* Number of symbols for a packet with (bps) bits per symbol */ 24/* Number of symbols for a packet with (bps) bits per symbol */
27#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps)) 25#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps))
28 26
29/* Transmission time for a packet containing (syms) symbols */ 27/* Transmission time (nanoseconds) for a packet containing (syms) symbols */
30#define MCS_SYMBOL_TIME(sgi, syms) \ 28#define MCS_SYMBOL_TIME(sgi, syms) \
31 (sgi ? \ 29 (sgi ? \
32 ((syms) * 18 + 4) / 5 : /* syms * 3.6 us */ \ 30 ((syms) * 18000 + 4000) / 5 : /* syms * 3.6 us */ \
33 (syms) << 2 /* syms * 4 us */ \ 31 ((syms) * 1000) << 2 /* syms * 4 us */ \
34 ) 32 )
35 33
36/* Transmit duration for the raw data part of an average sized packet */ 34/* Transmit duration for the raw data part of an average sized packet */
@@ -64,9 +62,9 @@
64} 62}
65 63
66#define CCK_DURATION(_bitrate, _short, _len) \ 64#define CCK_DURATION(_bitrate, _short, _len) \
67 (10 /* SIFS */ + \ 65 (1000 * (10 /* SIFS */ + \
68 (_short ? 72 + 24 : 144 + 48 ) + \ 66 (_short ? 72 + 24 : 144 + 48 ) + \
69 (8 * (_len + 4) * 10) / (_bitrate)) 67 (8 * (_len + 4) * 10) / (_bitrate)))
70 68
71#define CCK_ACK_DURATION(_bitrate, _short) \ 69#define CCK_ACK_DURATION(_bitrate, _short) \
72 (CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \ 70 (CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \
@@ -129,15 +127,6 @@ const struct mcs_group minstrel_mcs_groups[] = {
129static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES]; 127static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES];
130 128
131/* 129/*
132 * Perform EWMA (Exponentially Weighted Moving Average) calculation
133 */
134static int
135minstrel_ewma(int old, int new, int weight)
136{
137 return (new * (100 - weight) + old * weight) / 100;
138}
139
140/*
141 * Look up an MCS group index based on mac80211 rate information 130 * Look up an MCS group index based on mac80211 rate information
142 */ 131 */
143static int 132static int
@@ -211,20 +200,32 @@ static void
211minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) 200minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate)
212{ 201{
213 struct minstrel_rate_stats *mr; 202 struct minstrel_rate_stats *mr;
214 unsigned int usecs = 0; 203 unsigned int nsecs = 0;
204 unsigned int tp;
205 unsigned int prob;
215 206
216 mr = &mi->groups[group].rates[rate]; 207 mr = &mi->groups[group].rates[rate];
208 prob = mr->probability;
217 209
218 if (mr->probability < MINSTREL_FRAC(1, 10)) { 210 if (prob < MINSTREL_FRAC(1, 10)) {
219 mr->cur_tp = 0; 211 mr->cur_tp = 0;
220 return; 212 return;
221 } 213 }
222 214
215 /*
216 * For the throughput calculation, limit the probability value to 90% to
217 * account for collision related packet error rate fluctuation
218 */
219 if (prob > MINSTREL_FRAC(9, 10))
220 prob = MINSTREL_FRAC(9, 10);
221
223 if (group != MINSTREL_CCK_GROUP) 222 if (group != MINSTREL_CCK_GROUP)
224 usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); 223 nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
224
225 nsecs += minstrel_mcs_groups[group].duration[rate];
226 tp = 1000000 * ((mr->probability * 1000) / nsecs);
225 227
226 usecs += minstrel_mcs_groups[group].duration[rate]; 228 mr->cur_tp = MINSTREL_TRUNC(tp);
227 mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability);
228} 229}
229 230
230/* 231/*
@@ -308,8 +309,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
308 } 309 }
309 } 310 }
310 311
311 /* try to sample up to half of the available rates during each interval */ 312 /* try to sample all available rates during each interval */
312 mi->sample_count *= 4; 313 mi->sample_count *= 8;
313 314
314 cur_prob = 0; 315 cur_prob = 0;
315 cur_prob_tp = 0; 316 cur_prob_tp = 0;
@@ -320,20 +321,13 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
320 if (!mg->supported) 321 if (!mg->supported)
321 continue; 322 continue;
322 323
323 mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
324 if (cur_prob_tp < mr->cur_tp &&
325 minstrel_mcs_groups[group].streams == 1) {
326 mi->max_prob_rate = mg->max_prob_rate;
327 cur_prob = mr->cur_prob;
328 cur_prob_tp = mr->cur_tp;
329 }
330
331 mr = minstrel_get_ratestats(mi, mg->max_tp_rate); 324 mr = minstrel_get_ratestats(mi, mg->max_tp_rate);
332 if (cur_tp < mr->cur_tp) { 325 if (cur_tp < mr->cur_tp) {
333 mi->max_tp_rate2 = mi->max_tp_rate; 326 mi->max_tp_rate2 = mi->max_tp_rate;
334 cur_tp2 = cur_tp; 327 cur_tp2 = cur_tp;
335 mi->max_tp_rate = mg->max_tp_rate; 328 mi->max_tp_rate = mg->max_tp_rate;
336 cur_tp = mr->cur_tp; 329 cur_tp = mr->cur_tp;
330 mi->max_prob_streams = minstrel_mcs_groups[group].streams - 1;
337 } 331 }
338 332
339 mr = minstrel_get_ratestats(mi, mg->max_tp_rate2); 333 mr = minstrel_get_ratestats(mi, mg->max_tp_rate2);
@@ -343,6 +337,23 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
343 } 337 }
344 } 338 }
345 339
340 if (mi->max_prob_streams < 1)
341 mi->max_prob_streams = 1;
342
343 for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
344 mg = &mi->groups[group];
345 if (!mg->supported)
346 continue;
347 mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
348 if (cur_prob_tp < mr->cur_tp &&
349 minstrel_mcs_groups[group].streams <= mi->max_prob_streams) {
350 mi->max_prob_rate = mg->max_prob_rate;
351 cur_prob = mr->cur_prob;
352 cur_prob_tp = mr->cur_tp;
353 }
354 }
355
356
346 mi->stats_update = jiffies; 357 mi->stats_update = jiffies;
347} 358}
348 359
@@ -467,7 +478,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
467 478
468 if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) { 479 if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
469 mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); 480 mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
470 mi->sample_tries = 2; 481 mi->sample_tries = 1;
471 mi->sample_count--; 482 mi->sample_count--;
472 } 483 }
473 484
@@ -536,7 +547,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
536 mr->retry_updated = true; 547 mr->retry_updated = true;
537 548
538 group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; 549 group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
539 tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len; 550 tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000;
540 551
541 /* Contention time for first 2 tries */ 552 /* Contention time for first 2 tries */
542 ctime = (t_slot * cw) >> 1; 553 ctime = (t_slot * cw) >> 1;
@@ -616,6 +627,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
616{ 627{
617 struct minstrel_rate_stats *mr; 628 struct minstrel_rate_stats *mr;
618 struct minstrel_mcs_group_data *mg; 629 struct minstrel_mcs_group_data *mg;
630 unsigned int sample_dur, sample_group;
619 int sample_idx = 0; 631 int sample_idx = 0;
620 632
621 if (mi->sample_wait > 0) { 633 if (mi->sample_wait > 0) {
@@ -626,39 +638,46 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
626 if (!mi->sample_tries) 638 if (!mi->sample_tries)
627 return -1; 639 return -1;
628 640
629 mi->sample_tries--;
630 mg = &mi->groups[mi->sample_group]; 641 mg = &mi->groups[mi->sample_group];
631 sample_idx = sample_table[mg->column][mg->index]; 642 sample_idx = sample_table[mg->column][mg->index];
632 mr = &mg->rates[sample_idx]; 643 mr = &mg->rates[sample_idx];
633 sample_idx += mi->sample_group * MCS_GROUP_RATES; 644 sample_group = mi->sample_group;
645 sample_idx += sample_group * MCS_GROUP_RATES;
634 minstrel_next_sample_idx(mi); 646 minstrel_next_sample_idx(mi);
635 647
636 /* 648 /*
637 * Sampling might add some overhead (RTS, no aggregation) 649 * Sampling might add some overhead (RTS, no aggregation)
638 * to the frame. Hence, don't use sampling for the currently 650 * to the frame. Hence, don't use sampling for the currently
639 * used max TP rate. 651 * used rates.
640 */ 652 */
641 if (sample_idx == mi->max_tp_rate) 653 if (sample_idx == mi->max_tp_rate ||
654 sample_idx == mi->max_tp_rate2 ||
655 sample_idx == mi->max_prob_rate)
642 return -1; 656 return -1;
657
643 /* 658 /*
644 * When not using MRR, do not sample if the probability is already 659 * Do not sample if the probability is already higher than 95%
645 * higher than 95% to avoid wasting airtime 660 * to avoid wasting airtime.
646 */ 661 */
647 if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) 662 if (mr->probability > MINSTREL_FRAC(95, 100))
648 return -1; 663 return -1;
649 664
650 /* 665 /*
651 * Make sure that lower rates get sampled only occasionally, 666 * Make sure that lower rates get sampled only occasionally,
652 * if the link is working perfectly. 667 * if the link is working perfectly.
653 */ 668 */
654 if (minstrel_get_duration(sample_idx) > 669 sample_dur = minstrel_get_duration(sample_idx);
655 minstrel_get_duration(mi->max_tp_rate)) { 670 if (sample_dur >= minstrel_get_duration(mi->max_tp_rate2) &&
671 (mi->max_prob_streams <
672 minstrel_mcs_groups[sample_group].streams ||
673 sample_dur >= minstrel_get_duration(mi->max_prob_rate))) {
656 if (mr->sample_skipped < 20) 674 if (mr->sample_skipped < 20)
657 return -1; 675 return -1;
658 676
659 if (mi->sample_slow++ > 2) 677 if (mi->sample_slow++ > 2)
660 return -1; 678 return -1;
661 } 679 }
680 mi->sample_tries--;
662 681
663 return sample_idx; 682 return sample_idx;
664} 683}
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index 302dbd52180d..9b16e9de9923 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -16,11 +16,6 @@
16#define MINSTREL_MAX_STREAMS 3 16#define MINSTREL_MAX_STREAMS 3
17#define MINSTREL_STREAM_GROUPS 4 17#define MINSTREL_STREAM_GROUPS 4
18 18
19/* scaled fraction values */
20#define MINSTREL_SCALE 16
21#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
22#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
23
24#define MCS_GROUP_RATES 8 19#define MCS_GROUP_RATES 8
25 20
26struct mcs_group { 21struct mcs_group {
@@ -85,6 +80,7 @@ struct minstrel_ht_sta {
85 80
86 /* best probability rate */ 81 /* best probability rate */
87 unsigned int max_prob_rate; 82 unsigned int max_prob_rate;
83 unsigned int max_prob_streams;
88 84
89 /* time of last status update */ 85 /* time of last status update */
90 unsigned long stats_update; 86 unsigned long stats_update;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c6844ad080be..2528b5a4d6d4 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -648,24 +648,6 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
648 return RX_CONTINUE; 648 return RX_CONTINUE;
649} 649}
650 650
651#define SEQ_MODULO 0x1000
652#define SEQ_MASK 0xfff
653
654static inline int seq_less(u16 sq1, u16 sq2)
655{
656 return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1);
657}
658
659static inline u16 seq_inc(u16 sq)
660{
661 return (sq + 1) & SEQ_MASK;
662}
663
664static inline u16 seq_sub(u16 sq1, u16 sq2)
665{
666 return (sq1 - sq2) & SEQ_MASK;
667}
668
669static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, 651static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
670 struct tid_ampdu_rx *tid_agg_rx, 652 struct tid_ampdu_rx *tid_agg_rx,
671 int index, 653 int index,
@@ -687,7 +669,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
687 __skb_queue_tail(frames, skb); 669 __skb_queue_tail(frames, skb);
688 670
689no_frame: 671no_frame:
690 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); 672 tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
691} 673}
692 674
693static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata, 675static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata,
@@ -699,8 +681,9 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata
699 681
700 lockdep_assert_held(&tid_agg_rx->reorder_lock); 682 lockdep_assert_held(&tid_agg_rx->reorder_lock);
701 683
702 while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { 684 while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) {
703 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % 685 index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
686 tid_agg_rx->ssn) %
704 tid_agg_rx->buf_size; 687 tid_agg_rx->buf_size;
705 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, 688 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
706 frames); 689 frames);
@@ -727,8 +710,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
727 lockdep_assert_held(&tid_agg_rx->reorder_lock); 710 lockdep_assert_held(&tid_agg_rx->reorder_lock);
728 711
729 /* release the buffer until next missing frame */ 712 /* release the buffer until next missing frame */
730 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % 713 index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
731 tid_agg_rx->buf_size; 714 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
732 if (!tid_agg_rx->reorder_buf[index] && 715 if (!tid_agg_rx->reorder_buf[index] &&
733 tid_agg_rx->stored_mpdu_num) { 716 tid_agg_rx->stored_mpdu_num) {
734 /* 717 /*
@@ -756,19 +739,22 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
756 * Increment the head seq# also for the skipped slots. 739 * Increment the head seq# also for the skipped slots.
757 */ 740 */
758 tid_agg_rx->head_seq_num = 741 tid_agg_rx->head_seq_num =
759 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK; 742 (tid_agg_rx->head_seq_num +
743 skipped) & IEEE80211_SN_MASK;
760 skipped = 0; 744 skipped = 0;
761 } 745 }
762 } else while (tid_agg_rx->reorder_buf[index]) { 746 } else while (tid_agg_rx->reorder_buf[index]) {
763 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, 747 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
764 frames); 748 frames);
765 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % 749 index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
750 tid_agg_rx->ssn) %
766 tid_agg_rx->buf_size; 751 tid_agg_rx->buf_size;
767 } 752 }
768 753
769 if (tid_agg_rx->stored_mpdu_num) { 754 if (tid_agg_rx->stored_mpdu_num) {
770 j = index = seq_sub(tid_agg_rx->head_seq_num, 755 j = index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
771 tid_agg_rx->ssn) % tid_agg_rx->buf_size; 756 tid_agg_rx->ssn) %
757 tid_agg_rx->buf_size;
772 758
773 for (; j != (index - 1) % tid_agg_rx->buf_size; 759 for (; j != (index - 1) % tid_agg_rx->buf_size;
774 j = (j + 1) % tid_agg_rx->buf_size) { 760 j = (j + 1) % tid_agg_rx->buf_size) {
@@ -809,7 +795,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
809 head_seq_num = tid_agg_rx->head_seq_num; 795 head_seq_num = tid_agg_rx->head_seq_num;
810 796
811 /* frame with out of date sequence number */ 797 /* frame with out of date sequence number */
812 if (seq_less(mpdu_seq_num, head_seq_num)) { 798 if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
813 dev_kfree_skb(skb); 799 dev_kfree_skb(skb);
814 goto out; 800 goto out;
815 } 801 }
@@ -818,8 +804,9 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
818 * If frame the sequence number exceeds our buffering window 804 * If frame the sequence number exceeds our buffering window
819 * size release some previous frames to make room for this one. 805 * size release some previous frames to make room for this one.
820 */ 806 */
821 if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) { 807 if (!ieee80211_sn_less(mpdu_seq_num, head_seq_num + buf_size)) {
822 head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size)); 808 head_seq_num = ieee80211_sn_inc(
809 ieee80211_sn_sub(mpdu_seq_num, buf_size));
823 /* release stored frames up to new head to stack */ 810 /* release stored frames up to new head to stack */
824 ieee80211_release_reorder_frames(sdata, tid_agg_rx, 811 ieee80211_release_reorder_frames(sdata, tid_agg_rx,
825 head_seq_num, frames); 812 head_seq_num, frames);
@@ -827,7 +814,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
827 814
828 /* Now the new frame is always in the range of the reordering buffer */ 815 /* Now the new frame is always in the range of the reordering buffer */
829 816
830 index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; 817 index = ieee80211_sn_sub(mpdu_seq_num,
818 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
831 819
832 /* check if we already stored this frame */ 820 /* check if we already stored this frame */
833 if (tid_agg_rx->reorder_buf[index]) { 821 if (tid_agg_rx->reorder_buf[index]) {
@@ -843,7 +831,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
843 */ 831 */
844 if (mpdu_seq_num == tid_agg_rx->head_seq_num && 832 if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
845 tid_agg_rx->stored_mpdu_num == 0) { 833 tid_agg_rx->stored_mpdu_num == 0) {
846 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); 834 tid_agg_rx->head_seq_num =
835 ieee80211_sn_inc(tid_agg_rx->head_seq_num);
847 ret = false; 836 ret = false;
848 goto out; 837 goto out;
849 } 838 }
@@ -1894,8 +1883,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1894 * 'align' will only take the values 0 or 2 here 1883 * 'align' will only take the values 0 or 2 here
1895 * since all frames are required to be aligned 1884 * since all frames are required to be aligned
1896 * to 2-byte boundaries when being passed to 1885 * to 2-byte boundaries when being passed to
1897 * mac80211. That also explains the __skb_push() 1886 * mac80211; the code here works just as well if
1898 * below. 1887 * that isn't true, but mac80211 assumes it can
1888 * access fields as 2-byte aligned (e.g. for
1889 * compare_ether_addr)
1899 */ 1890 */
1900 align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3; 1891 align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3;
1901 if (align) { 1892 if (align) {
@@ -2552,7 +2543,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2552 case WLAN_SP_MESH_PEERING_CONFIRM: 2543 case WLAN_SP_MESH_PEERING_CONFIRM:
2553 if (!ieee80211_vif_is_mesh(&sdata->vif)) 2544 if (!ieee80211_vif_is_mesh(&sdata->vif))
2554 goto invalid; 2545 goto invalid;
2555 if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) 2546 if (sdata->u.mesh.user_mpm)
2556 /* userspace handles this frame */ 2547 /* userspace handles this frame */
2557 break; 2548 break;
2558 goto queue; 2549 goto queue;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 43a45cf00e06..cb34cbbaa20c 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -153,7 +153,6 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
153 u8 *elements; 153 u8 *elements;
154 struct ieee80211_channel *channel; 154 struct ieee80211_channel *channel;
155 size_t baselen; 155 size_t baselen;
156 bool beacon;
157 struct ieee802_11_elems elems; 156 struct ieee802_11_elems elems;
158 157
159 if (skb->len < 24 || 158 if (skb->len < 24 ||
@@ -175,11 +174,9 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
175 174
176 elements = mgmt->u.probe_resp.variable; 175 elements = mgmt->u.probe_resp.variable;
177 baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); 176 baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable);
178 beacon = false;
179 } else { 177 } else {
180 baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable); 178 baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable);
181 elements = mgmt->u.beacon.variable; 179 elements = mgmt->u.beacon.variable;
182 beacon = true;
183 } 180 }
184 181
185 if (baselen > skb->len) 182 if (baselen > skb->len)
@@ -335,7 +332,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
335 ieee80211_offchannel_stop_vifs(local); 332 ieee80211_offchannel_stop_vifs(local);
336 333
337 /* ensure nullfunc is transmitted before leaving operating channel */ 334 /* ensure nullfunc is transmitted before leaving operating channel */
338 drv_flush(local, false); 335 ieee80211_flush_queues(local, NULL);
339 336
340 ieee80211_configure_filter(local); 337 ieee80211_configure_filter(local);
341 338
@@ -671,7 +668,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local,
671 ieee80211_offchannel_stop_vifs(local); 668 ieee80211_offchannel_stop_vifs(local);
672 669
673 if (local->ops->flush) { 670 if (local->ops->flush) {
674 drv_flush(local, false); 671 ieee80211_flush_queues(local, NULL);
675 *next_delay = 0; 672 *next_delay = 0;
676 } else 673 } else
677 *next_delay = HZ / 10; 674 *next_delay = HZ / 10;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 238a0cca320e..11216bc13b27 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -342,6 +342,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
342 INIT_WORK(&sta->drv_unblock_wk, sta_unblock); 342 INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
343 INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); 343 INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
344 mutex_init(&sta->ampdu_mlme.mtx); 344 mutex_init(&sta->ampdu_mlme.mtx);
345#ifdef CONFIG_MAC80211_MESH
346 if (ieee80211_vif_is_mesh(&sdata->vif) &&
347 !sdata->u.mesh.user_mpm)
348 init_timer(&sta->plink_timer);
349#endif
345 350
346 memcpy(sta->sta.addr, addr, ETH_ALEN); 351 memcpy(sta->sta.addr, addr, ETH_ALEN);
347 sta->local = local; 352 sta->local = local;
@@ -551,6 +556,15 @@ static inline void __bss_tim_clear(u8 *tim, u16 id)
551 tim[id / 8] &= ~(1 << (id % 8)); 556 tim[id / 8] &= ~(1 << (id % 8));
552} 557}
553 558
559static inline bool __bss_tim_get(u8 *tim, u16 id)
560{
561 /*
562 * This format has been mandated by the IEEE specifications,
563 * so this line may not be changed to use the test_bit() format.
564 */
565 return tim[id / 8] & (1 << (id % 8));
566}
567
554static unsigned long ieee80211_tids_for_ac(int ac) 568static unsigned long ieee80211_tids_for_ac(int ac)
555{ 569{
556 /* If we ever support TIDs > 7, this obviously needs to be adjusted */ 570 /* If we ever support TIDs > 7, this obviously needs to be adjusted */
@@ -631,6 +645,9 @@ void sta_info_recalc_tim(struct sta_info *sta)
631 done: 645 done:
632 spin_lock_bh(&local->tim_lock); 646 spin_lock_bh(&local->tim_lock);
633 647
648 if (indicate_tim == __bss_tim_get(ps->tim, id))
649 goto out_unlock;
650
634 if (indicate_tim) 651 if (indicate_tim)
635 __bss_tim_set(ps->tim, id); 652 __bss_tim_set(ps->tim, id);
636 else 653 else
@@ -642,6 +659,7 @@ void sta_info_recalc_tim(struct sta_info *sta)
642 local->tim_in_locked_section = false; 659 local->tim_in_locked_section = false;
643 } 660 }
644 661
662out_unlock:
645 spin_unlock_bh(&local->tim_lock); 663 spin_unlock_bh(&local->tim_lock);
646} 664}
647 665
@@ -765,8 +783,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
765{ 783{
766 struct ieee80211_local *local; 784 struct ieee80211_local *local;
767 struct ieee80211_sub_if_data *sdata; 785 struct ieee80211_sub_if_data *sdata;
768 int ret, i; 786 int ret;
769 bool have_key = false;
770 787
771 might_sleep(); 788 might_sleep();
772 789
@@ -793,19 +810,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
793 810
794 list_del_rcu(&sta->list); 811 list_del_rcu(&sta->list);
795 812
796 mutex_lock(&local->key_mtx); 813 /* this always calls synchronize_net() */
797 for (i = 0; i < NUM_DEFAULT_KEYS; i++) { 814 ieee80211_free_sta_keys(local, sta);
798 __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i]));
799 have_key = true;
800 }
801 if (sta->ptk) {
802 __ieee80211_key_free(key_mtx_dereference(local, sta->ptk));
803 have_key = true;
804 }
805 mutex_unlock(&local->key_mtx);
806
807 if (!have_key)
808 synchronize_net();
809 815
810 sta->dead = true; 816 sta->dead = true;
811 817
@@ -1391,30 +1397,16 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
1391} 1397}
1392EXPORT_SYMBOL(ieee80211_sta_block_awake); 1398EXPORT_SYMBOL(ieee80211_sta_block_awake);
1393 1399
1394void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta) 1400void ieee80211_sta_eosp(struct ieee80211_sta *pubsta)
1395{ 1401{
1396 struct sta_info *sta = container_of(pubsta, struct sta_info, sta); 1402 struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
1397 struct ieee80211_local *local = sta->local; 1403 struct ieee80211_local *local = sta->local;
1398 struct sk_buff *skb;
1399 struct skb_eosp_msg_data *data;
1400 1404
1401 trace_api_eosp(local, pubsta); 1405 trace_api_eosp(local, pubsta);
1402 1406
1403 skb = alloc_skb(0, GFP_ATOMIC); 1407 clear_sta_flag(sta, WLAN_STA_SP);
1404 if (!skb) {
1405 /* too bad ... but race is better than loss */
1406 clear_sta_flag(sta, WLAN_STA_SP);
1407 return;
1408 }
1409
1410 data = (void *)skb->cb;
1411 memcpy(data->sta, pubsta->addr, ETH_ALEN);
1412 memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN);
1413 skb->pkt_type = IEEE80211_EOSP_MSG;
1414 skb_queue_tail(&local->skb_queue, skb);
1415 tasklet_schedule(&local->tasklet);
1416} 1408}
1417EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe); 1409EXPORT_SYMBOL(ieee80211_sta_eosp);
1418 1410
1419void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, 1411void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
1420 u8 tid, bool buffered) 1412 u8 tid, bool buffered)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 4947341a2a82..adc30045f99e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -281,7 +281,6 @@ struct sta_ampdu_mlme {
281 * @plink_state: peer link state 281 * @plink_state: peer link state
282 * @plink_timeout: timeout of peer link 282 * @plink_timeout: timeout of peer link
283 * @plink_timer: peer link watch timer 283 * @plink_timer: peer link watch timer
284 * @plink_timer_was_running: used by suspend/resume to restore timers
285 * @t_offset: timing offset relative to this host 284 * @t_offset: timing offset relative to this host
286 * @t_offset_setpoint: reference timing offset of this sta to be used when 285 * @t_offset_setpoint: reference timing offset of this sta to be used when
287 * calculating clockdrift 286 * calculating clockdrift
@@ -334,7 +333,8 @@ struct sta_info {
334 unsigned long driver_buffered_tids; 333 unsigned long driver_buffered_tids;
335 334
336 /* Updated from RX path only, no locking requirements */ 335 /* Updated from RX path only, no locking requirements */
337 unsigned long rx_packets, rx_bytes; 336 unsigned long rx_packets;
337 u64 rx_bytes;
338 unsigned long wep_weak_iv_count; 338 unsigned long wep_weak_iv_count;
339 unsigned long last_rx; 339 unsigned long last_rx;
340 long last_connected; 340 long last_connected;
@@ -354,9 +354,9 @@ struct sta_info {
354 unsigned int fail_avg; 354 unsigned int fail_avg;
355 355
356 /* Updated from TX path only, no locking requirements */ 356 /* Updated from TX path only, no locking requirements */
357 unsigned long tx_packets; 357 u32 tx_fragments;
358 unsigned long tx_bytes; 358 u64 tx_packets[IEEE80211_NUM_ACS];
359 unsigned long tx_fragments; 359 u64 tx_bytes[IEEE80211_NUM_ACS];
360 struct ieee80211_tx_rate last_tx_rate; 360 struct ieee80211_tx_rate last_tx_rate;
361 int last_rx_rate_idx; 361 int last_rx_rate_idx;
362 u32 last_rx_rate_flag; 362 u32 last_rx_rate_flag;
@@ -379,7 +379,6 @@ struct sta_info {
379 __le16 reason; 379 __le16 reason;
380 u8 plink_retries; 380 u8 plink_retries;
381 bool ignore_plink_timer; 381 bool ignore_plink_timer;
382 bool plink_timer_was_running;
383 enum nl80211_plink_state plink_state; 382 enum nl80211_plink_state plink_state;
384 u32 plink_timeout; 383 u32 plink_timeout;
385 struct timer_list plink_timer; 384 struct timer_list plink_timer;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3d7cd2a0582f..c5899797a8d4 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -431,6 +431,30 @@ TRACE_EVENT(drv_prepare_multicast,
431 ) 431 )
432); 432);
433 433
434TRACE_EVENT(drv_set_multicast_list,
435 TP_PROTO(struct ieee80211_local *local,
436 struct ieee80211_sub_if_data *sdata, int mc_count),
437
438 TP_ARGS(local, sdata, mc_count),
439
440 TP_STRUCT__entry(
441 LOCAL_ENTRY
442 __field(bool, allmulti)
443 __field(int, mc_count)
444 ),
445
446 TP_fast_assign(
447 LOCAL_ASSIGN;
448 __entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI;
449 __entry->mc_count = mc_count;
450 ),
451
452 TP_printk(
453 LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d",
454 LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti
455 )
456);
457
434TRACE_EVENT(drv_configure_filter, 458TRACE_EVENT(drv_configure_filter,
435 TP_PROTO(struct ieee80211_local *local, 459 TP_PROTO(struct ieee80211_local *local,
436 unsigned int changed_flags, 460 unsigned int changed_flags,
@@ -940,23 +964,26 @@ TRACE_EVENT(drv_get_survey,
940); 964);
941 965
942TRACE_EVENT(drv_flush, 966TRACE_EVENT(drv_flush,
943 TP_PROTO(struct ieee80211_local *local, bool drop), 967 TP_PROTO(struct ieee80211_local *local,
968 u32 queues, bool drop),
944 969
945 TP_ARGS(local, drop), 970 TP_ARGS(local, queues, drop),
946 971
947 TP_STRUCT__entry( 972 TP_STRUCT__entry(
948 LOCAL_ENTRY 973 LOCAL_ENTRY
949 __field(bool, drop) 974 __field(bool, drop)
975 __field(u32, queues)
950 ), 976 ),
951 977
952 TP_fast_assign( 978 TP_fast_assign(
953 LOCAL_ASSIGN; 979 LOCAL_ASSIGN;
954 __entry->drop = drop; 980 __entry->drop = drop;
981 __entry->queues = queues;
955 ), 982 ),
956 983
957 TP_printk( 984 TP_printk(
958 LOCAL_PR_FMT " drop:%d", 985 LOCAL_PR_FMT " queues:0x%x drop:%d",
959 LOCAL_PR_ARG, __entry->drop 986 LOCAL_PR_ARG, __entry->queues, __entry->drop
960 ) 987 )
961); 988);
962 989
@@ -1042,15 +1069,17 @@ TRACE_EVENT(drv_remain_on_channel,
1042 TP_PROTO(struct ieee80211_local *local, 1069 TP_PROTO(struct ieee80211_local *local,
1043 struct ieee80211_sub_if_data *sdata, 1070 struct ieee80211_sub_if_data *sdata,
1044 struct ieee80211_channel *chan, 1071 struct ieee80211_channel *chan,
1045 unsigned int duration), 1072 unsigned int duration,
1073 enum ieee80211_roc_type type),
1046 1074
1047 TP_ARGS(local, sdata, chan, duration), 1075 TP_ARGS(local, sdata, chan, duration, type),
1048 1076
1049 TP_STRUCT__entry( 1077 TP_STRUCT__entry(
1050 LOCAL_ENTRY 1078 LOCAL_ENTRY
1051 VIF_ENTRY 1079 VIF_ENTRY
1052 __field(int, center_freq) 1080 __field(int, center_freq)
1053 __field(unsigned int, duration) 1081 __field(unsigned int, duration)
1082 __field(u32, type)
1054 ), 1083 ),
1055 1084
1056 TP_fast_assign( 1085 TP_fast_assign(
@@ -1058,12 +1087,13 @@ TRACE_EVENT(drv_remain_on_channel,
1058 VIF_ASSIGN; 1087 VIF_ASSIGN;
1059 __entry->center_freq = chan->center_freq; 1088 __entry->center_freq = chan->center_freq;
1060 __entry->duration = duration; 1089 __entry->duration = duration;
1090 __entry->type = type;
1061 ), 1091 ),
1062 1092
1063 TP_printk( 1093 TP_printk(
1064 LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms", 1094 LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms type=%d",
1065 LOCAL_PR_ARG, VIF_PR_ARG, 1095 LOCAL_PR_ARG, VIF_PR_ARG,
1066 __entry->center_freq, __entry->duration 1096 __entry->center_freq, __entry->duration, __entry->type
1067 ) 1097 )
1068); 1098);
1069 1099
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8914d2d2881a..9e67cc97b87b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -233,6 +233,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
233 233
234 if (local->hw.conf.flags & IEEE80211_CONF_PS) { 234 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
235 ieee80211_stop_queues_by_reason(&local->hw, 235 ieee80211_stop_queues_by_reason(&local->hw,
236 IEEE80211_MAX_QUEUE_MAP,
236 IEEE80211_QUEUE_STOP_REASON_PS); 237 IEEE80211_QUEUE_STOP_REASON_PS);
237 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; 238 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
238 ieee80211_queue_work(&local->hw, 239 ieee80211_queue_work(&local->hw,
@@ -991,15 +992,18 @@ static ieee80211_tx_result debug_noinline
991ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) 992ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
992{ 993{
993 struct sk_buff *skb; 994 struct sk_buff *skb;
995 int ac = -1;
994 996
995 if (!tx->sta) 997 if (!tx->sta)
996 return TX_CONTINUE; 998 return TX_CONTINUE;
997 999
998 tx->sta->tx_packets++;
999 skb_queue_walk(&tx->skbs, skb) { 1000 skb_queue_walk(&tx->skbs, skb) {
1001 ac = skb_get_queue_mapping(skb);
1000 tx->sta->tx_fragments++; 1002 tx->sta->tx_fragments++;
1001 tx->sta->tx_bytes += skb->len; 1003 tx->sta->tx_bytes[ac] += skb->len;
1002 } 1004 }
1005 if (ac >= 0)
1006 tx->sta->tx_packets[ac]++;
1003 1007
1004 return TX_CONTINUE; 1008 return TX_CONTINUE;
1005} 1009}
@@ -2085,7 +2089,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
2085 encaps_data = bridge_tunnel_header; 2089 encaps_data = bridge_tunnel_header;
2086 encaps_len = sizeof(bridge_tunnel_header); 2090 encaps_len = sizeof(bridge_tunnel_header);
2087 skip_header_bytes -= 2; 2091 skip_header_bytes -= 2;
2088 } else if (ethertype >= 0x600) { 2092 } else if (ethertype >= ETH_P_802_3_MIN) {
2089 encaps_data = rfc1042_header; 2093 encaps_data = rfc1042_header;
2090 encaps_len = sizeof(rfc1042_header); 2094 encaps_len = sizeof(rfc1042_header);
2091 skip_header_bytes -= 2; 2095 skip_header_bytes -= 2;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0f38f43ac62e..a7368870c8ee 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -453,7 +453,8 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
453} 453}
454 454
455void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, 455void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
456 enum queue_stop_reason reason) 456 unsigned long queues,
457 enum queue_stop_reason reason)
457{ 458{
458 struct ieee80211_local *local = hw_to_local(hw); 459 struct ieee80211_local *local = hw_to_local(hw);
459 unsigned long flags; 460 unsigned long flags;
@@ -461,7 +462,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
461 462
462 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 463 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
463 464
464 for (i = 0; i < hw->queues; i++) 465 for_each_set_bit(i, &queues, hw->queues)
465 __ieee80211_stop_queue(hw, i, reason); 466 __ieee80211_stop_queue(hw, i, reason);
466 467
467 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 468 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -469,7 +470,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
469 470
470void ieee80211_stop_queues(struct ieee80211_hw *hw) 471void ieee80211_stop_queues(struct ieee80211_hw *hw)
471{ 472{
472 ieee80211_stop_queues_by_reason(hw, 473 ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
473 IEEE80211_QUEUE_STOP_REASON_DRIVER); 474 IEEE80211_QUEUE_STOP_REASON_DRIVER);
474} 475}
475EXPORT_SYMBOL(ieee80211_stop_queues); 476EXPORT_SYMBOL(ieee80211_stop_queues);
@@ -491,6 +492,7 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)
491EXPORT_SYMBOL(ieee80211_queue_stopped); 492EXPORT_SYMBOL(ieee80211_queue_stopped);
492 493
493void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, 494void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
495 unsigned long queues,
494 enum queue_stop_reason reason) 496 enum queue_stop_reason reason)
495{ 497{
496 struct ieee80211_local *local = hw_to_local(hw); 498 struct ieee80211_local *local = hw_to_local(hw);
@@ -499,7 +501,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
499 501
500 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 502 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
501 503
502 for (i = 0; i < hw->queues; i++) 504 for_each_set_bit(i, &queues, hw->queues)
503 __ieee80211_wake_queue(hw, i, reason); 505 __ieee80211_wake_queue(hw, i, reason);
504 506
505 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 507 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -507,10 +509,42 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
507 509
508void ieee80211_wake_queues(struct ieee80211_hw *hw) 510void ieee80211_wake_queues(struct ieee80211_hw *hw)
509{ 511{
510 ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_DRIVER); 512 ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
513 IEEE80211_QUEUE_STOP_REASON_DRIVER);
511} 514}
512EXPORT_SYMBOL(ieee80211_wake_queues); 515EXPORT_SYMBOL(ieee80211_wake_queues);
513 516
517void ieee80211_flush_queues(struct ieee80211_local *local,
518 struct ieee80211_sub_if_data *sdata)
519{
520 u32 queues;
521
522 if (!local->ops->flush)
523 return;
524
525 if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) {
526 int ac;
527
528 queues = 0;
529
530 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
531 queues |= BIT(sdata->vif.hw_queue[ac]);
532 if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE)
533 queues |= BIT(sdata->vif.cab_queue);
534 } else {
535 /* all queues */
536 queues = BIT(local->hw.queues) - 1;
537 }
538
539 ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
540 IEEE80211_QUEUE_STOP_REASON_FLUSH);
541
542 drv_flush(local, queues, false);
543
544 ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
545 IEEE80211_QUEUE_STOP_REASON_FLUSH);
546}
547
514void ieee80211_iterate_active_interfaces( 548void ieee80211_iterate_active_interfaces(
515 struct ieee80211_hw *hw, u32 iter_flags, 549 struct ieee80211_hw *hw, u32 iter_flags,
516 void (*iterator)(void *data, u8 *mac, 550 void (*iterator)(void *data, u8 *mac,
@@ -1357,6 +1391,25 @@ void ieee80211_stop_device(struct ieee80211_local *local)
1357 drv_stop(local); 1391 drv_stop(local);
1358} 1392}
1359 1393
1394static void ieee80211_assign_chanctx(struct ieee80211_local *local,
1395 struct ieee80211_sub_if_data *sdata)
1396{
1397 struct ieee80211_chanctx_conf *conf;
1398 struct ieee80211_chanctx *ctx;
1399
1400 if (!local->use_chanctx)
1401 return;
1402
1403 mutex_lock(&local->chanctx_mtx);
1404 conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
1405 lockdep_is_held(&local->chanctx_mtx));
1406 if (conf) {
1407 ctx = container_of(conf, struct ieee80211_chanctx, conf);
1408 drv_assign_vif_chanctx(local, sdata, ctx);
1409 }
1410 mutex_unlock(&local->chanctx_mtx);
1411}
1412
1360int ieee80211_reconfig(struct ieee80211_local *local) 1413int ieee80211_reconfig(struct ieee80211_local *local)
1361{ 1414{
1362 struct ieee80211_hw *hw = &local->hw; 1415 struct ieee80211_hw *hw = &local->hw;
@@ -1445,36 +1498,14 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1445 } 1498 }
1446 1499
1447 list_for_each_entry(sdata, &local->interfaces, list) { 1500 list_for_each_entry(sdata, &local->interfaces, list) {
1448 struct ieee80211_chanctx_conf *ctx_conf;
1449
1450 if (!ieee80211_sdata_running(sdata)) 1501 if (!ieee80211_sdata_running(sdata))
1451 continue; 1502 continue;
1452 1503 ieee80211_assign_chanctx(local, sdata);
1453 mutex_lock(&local->chanctx_mtx);
1454 ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
1455 lockdep_is_held(&local->chanctx_mtx));
1456 if (ctx_conf) {
1457 ctx = container_of(ctx_conf, struct ieee80211_chanctx,
1458 conf);
1459 drv_assign_vif_chanctx(local, sdata, ctx);
1460 }
1461 mutex_unlock(&local->chanctx_mtx);
1462 } 1504 }
1463 1505
1464 sdata = rtnl_dereference(local->monitor_sdata); 1506 sdata = rtnl_dereference(local->monitor_sdata);
1465 if (sdata && local->use_chanctx && ieee80211_sdata_running(sdata)) { 1507 if (sdata && ieee80211_sdata_running(sdata))
1466 struct ieee80211_chanctx_conf *ctx_conf; 1508 ieee80211_assign_chanctx(local, sdata);
1467
1468 mutex_lock(&local->chanctx_mtx);
1469 ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
1470 lockdep_is_held(&local->chanctx_mtx));
1471 if (ctx_conf) {
1472 ctx = container_of(ctx_conf, struct ieee80211_chanctx,
1473 conf);
1474 drv_assign_vif_chanctx(local, sdata, ctx);
1475 }
1476 mutex_unlock(&local->chanctx_mtx);
1477 }
1478 1509
1479 /* add STAs back */ 1510 /* add STAs back */
1480 mutex_lock(&local->sta_mtx); 1511 mutex_lock(&local->sta_mtx);
@@ -1534,11 +1565,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1534 BSS_CHANGED_IDLE | 1565 BSS_CHANGED_IDLE |
1535 BSS_CHANGED_TXPOWER; 1566 BSS_CHANGED_TXPOWER;
1536 1567
1537#ifdef CONFIG_PM
1538 if (local->resuming && !reconfig_due_to_wowlan)
1539 sdata->vif.bss_conf = sdata->suspend_bss_conf;
1540#endif
1541
1542 switch (sdata->vif.type) { 1568 switch (sdata->vif.type) {
1543 case NL80211_IFTYPE_STATION: 1569 case NL80211_IFTYPE_STATION:
1544 changed |= BSS_CHANGED_ASSOC | 1570 changed |= BSS_CHANGED_ASSOC |
@@ -1659,8 +1685,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1659 mutex_unlock(&local->sta_mtx); 1685 mutex_unlock(&local->sta_mtx);
1660 } 1686 }
1661 1687
1662 ieee80211_wake_queues_by_reason(hw, 1688 ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
1663 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 1689 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
1664 1690
1665 /* 1691 /*
1666 * If this is for hw restart things are still running. 1692 * If this is for hw restart things are still running.
@@ -1678,28 +1704,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1678 mb(); 1704 mb();
1679 local->resuming = false; 1705 local->resuming = false;
1680 1706
1681 list_for_each_entry(sdata, &local->interfaces, list) {
1682 switch(sdata->vif.type) {
1683 case NL80211_IFTYPE_STATION:
1684 ieee80211_sta_restart(sdata);
1685 break;
1686 case NL80211_IFTYPE_ADHOC:
1687 ieee80211_ibss_restart(sdata);
1688 break;
1689 case NL80211_IFTYPE_MESH_POINT:
1690 ieee80211_mesh_restart(sdata);
1691 break;
1692 default:
1693 break;
1694 }
1695 }
1696
1697 mod_timer(&local->sta_cleanup, jiffies + 1); 1707 mod_timer(&local->sta_cleanup, jiffies + 1);
1698
1699 mutex_lock(&local->sta_mtx);
1700 list_for_each_entry(sta, &local->sta_list, list)
1701 mesh_plink_restart(sta);
1702 mutex_unlock(&local->sta_mtx);
1703#else 1708#else
1704 WARN_ON(1); 1709 WARN_ON(1);
1705#endif 1710#endif
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index a2c2258bc84e..171344d4eb7c 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -13,6 +13,104 @@
13#include "rate.h" 13#include "rate.h"
14 14
15 15
16static void __check_vhtcap_disable(struct ieee80211_sub_if_data *sdata,
17 struct ieee80211_sta_vht_cap *vht_cap,
18 u32 flag)
19{
20 __le32 le_flag = cpu_to_le32(flag);
21
22 if (sdata->u.mgd.vht_capa_mask.vht_cap_info & le_flag &&
23 !(sdata->u.mgd.vht_capa.vht_cap_info & le_flag))
24 vht_cap->cap &= ~flag;
25}
26
27void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
28 struct ieee80211_sta_vht_cap *vht_cap)
29{
30 int i;
31 u16 rxmcs_mask, rxmcs_cap, rxmcs_n, txmcs_mask, txmcs_cap, txmcs_n;
32
33 if (!vht_cap->vht_supported)
34 return;
35
36 if (sdata->vif.type != NL80211_IFTYPE_STATION)
37 return;
38
39 __check_vhtcap_disable(sdata, vht_cap,
40 IEEE80211_VHT_CAP_RXLDPC);
41 __check_vhtcap_disable(sdata, vht_cap,
42 IEEE80211_VHT_CAP_SHORT_GI_80);
43 __check_vhtcap_disable(sdata, vht_cap,
44 IEEE80211_VHT_CAP_SHORT_GI_160);
45 __check_vhtcap_disable(sdata, vht_cap,
46 IEEE80211_VHT_CAP_TXSTBC);
47 __check_vhtcap_disable(sdata, vht_cap,
48 IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE);
49 __check_vhtcap_disable(sdata, vht_cap,
50 IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE);
51 __check_vhtcap_disable(sdata, vht_cap,
52 IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN);
53 __check_vhtcap_disable(sdata, vht_cap,
54 IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN);
55
56 /* Allow user to decrease AMPDU length exponent */
57 if (sdata->u.mgd.vht_capa_mask.vht_cap_info &
58 cpu_to_le32(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK)) {
59 u32 cap, n;
60
61 n = le32_to_cpu(sdata->u.mgd.vht_capa.vht_cap_info) &
62 IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
63 n >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
64 cap = vht_cap->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
65 cap >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
66
67 if (n < cap) {
68 vht_cap->cap &=
69 ~IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
70 vht_cap->cap |=
71 n << IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
72 }
73 }
74
75 /* Allow the user to decrease MCSes */
76 rxmcs_mask =
77 le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.rx_mcs_map);
78 rxmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.rx_mcs_map);
79 rxmcs_n &= rxmcs_mask;
80 rxmcs_cap = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map);
81
82 txmcs_mask =
83 le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.tx_mcs_map);
84 txmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.tx_mcs_map);
85 txmcs_n &= txmcs_mask;
86 txmcs_cap = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map);
87 for (i = 0; i < 8; i++) {
88 u8 m, n, c;
89
90 m = (rxmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
91 n = (rxmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
92 c = (rxmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
93
94 if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) ||
95 n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) {
96 rxmcs_cap &= ~(3 << 2*i);
97 rxmcs_cap |= (rxmcs_n & (3 << 2*i));
98 }
99
100 m = (txmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
101 n = (txmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
102 c = (txmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
103
104 if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) ||
105 n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) {
106 txmcs_cap &= ~(3 << 2*i);
107 txmcs_cap |= (txmcs_n & (3 << 2*i));
108 }
109 }
110 vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(rxmcs_cap);
111 vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(txmcs_cap);
112}
113
16void 114void
17ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, 115ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
18 struct ieee80211_supported_band *sband, 116 struct ieee80211_supported_band *sband,
@@ -20,6 +118,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
20 struct sta_info *sta) 118 struct sta_info *sta)
21{ 119{
22 struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; 120 struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
121 struct ieee80211_sta_vht_cap own_cap;
122 u32 cap_info, i;
23 123
24 memset(vht_cap, 0, sizeof(*vht_cap)); 124 memset(vht_cap, 0, sizeof(*vht_cap));
25 125
@@ -35,12 +135,122 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
35 135
36 vht_cap->vht_supported = true; 136 vht_cap->vht_supported = true;
37 137
38 vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info); 138 own_cap = sband->vht_cap;
139 /*
140 * If user has specified capability overrides, take care
141 * of that if the station we're setting up is the AP that
142 * we advertised a restricted capability set to. Override
143 * our own capabilities and then use those below.
144 */
145 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
146 !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
147 ieee80211_apply_vhtcap_overrides(sdata, &own_cap);
148
149 /* take some capabilities as-is */
150 cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info);
151 vht_cap->cap = cap_info;
152 vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 |
153 IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
154 IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
155 IEEE80211_VHT_CAP_RXLDPC |
156 IEEE80211_VHT_CAP_VHT_TXOP_PS |
157 IEEE80211_VHT_CAP_HTC_VHT |
158 IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK |
159 IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB |
160 IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB |
161 IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN |
162 IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN;
163
164 /* and some based on our own capabilities */
165 switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
166 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
167 vht_cap->cap |= cap_info &
168 IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
169 break;
170 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
171 vht_cap->cap |= cap_info &
172 IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
173 break;
174 default:
175 /* nothing */
176 break;
177 }
178
179 /* symmetric capabilities */
180 vht_cap->cap |= cap_info & own_cap.cap &
181 (IEEE80211_VHT_CAP_SHORT_GI_80 |
182 IEEE80211_VHT_CAP_SHORT_GI_160);
183
184 /* remaining ones */
185 if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) {
186 vht_cap->cap |= cap_info &
187 (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
188 IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX |
189 IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX);
190 }
191
192 if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)
193 vht_cap->cap |= cap_info &
194 IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
195
196 if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)
197 vht_cap->cap |= cap_info &
198 IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
199
200 if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)
201 vht_cap->cap |= cap_info &
202 IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE;
203
204 if (own_cap.cap & IEEE80211_VHT_CAP_TXSTBC)
205 vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_RXSTBC_MASK;
206
207 if (own_cap.cap & IEEE80211_VHT_CAP_RXSTBC_MASK)
208 vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_TXSTBC;
39 209
40 /* Copy peer MCS info, the driver might need them. */ 210 /* Copy peer MCS info, the driver might need them. */
41 memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, 211 memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs,
42 sizeof(struct ieee80211_vht_mcs_info)); 212 sizeof(struct ieee80211_vht_mcs_info));
43 213
214 /* but also restrict MCSes */
215 for (i = 0; i < 8; i++) {
216 u16 own_rx, own_tx, peer_rx, peer_tx;
217
218 own_rx = le16_to_cpu(own_cap.vht_mcs.rx_mcs_map);
219 own_rx = (own_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
220
221 own_tx = le16_to_cpu(own_cap.vht_mcs.tx_mcs_map);
222 own_tx = (own_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
223
224 peer_rx = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map);
225 peer_rx = (peer_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
226
227 peer_tx = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map);
228 peer_tx = (peer_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
229
230 if (peer_tx != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
231 if (own_rx == IEEE80211_VHT_MCS_NOT_SUPPORTED)
232 peer_tx = IEEE80211_VHT_MCS_NOT_SUPPORTED;
233 else if (own_rx < peer_tx)
234 peer_tx = own_rx;
235 }
236
237 if (peer_rx != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
238 if (own_tx == IEEE80211_VHT_MCS_NOT_SUPPORTED)
239 peer_rx = IEEE80211_VHT_MCS_NOT_SUPPORTED;
240 else if (own_tx < peer_rx)
241 peer_rx = own_tx;
242 }
243
244 vht_cap->vht_mcs.rx_mcs_map &=
245 ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2);
246 vht_cap->vht_mcs.rx_mcs_map |= cpu_to_le16(peer_rx << i * 2);
247
248 vht_cap->vht_mcs.tx_mcs_map &=
249 ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2);
250 vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2);
251 }
252
253 /* finally set up the bandwidth */
44 switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { 254 switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
45 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: 255 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
46 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: 256 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index a4dcaf1dd4b6..5c9e021994ba 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -88,8 +88,6 @@ struct mac802154_sub_if_data {
88 88
89#define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw) 89#define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw)
90 90
91#define MAC802154_MAX_XMIT_ATTEMPTS 3
92
93#define MAC802154_CHAN_NONE (~(u8)0) /* No channel is assigned */ 91#define MAC802154_CHAN_NONE (~(u8)0) /* No channel is assigned */
94 92
95extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced; 93extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced;
@@ -114,5 +112,6 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev);
114u16 mac802154_dev_get_pan_id(const struct net_device *dev); 112u16 mac802154_dev_get_pan_id(const struct net_device *dev);
115void mac802154_dev_set_pan_id(struct net_device *dev, u16 val); 113void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);
116void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); 114void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan);
115u8 mac802154_dev_get_dsn(const struct net_device *dev);
117 116
118#endif /* MAC802154_H */ 117#endif /* MAC802154_H */
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index d8d277006089..a99910d4d52f 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -73,4 +73,5 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = {
73 .start_req = mac802154_mlme_start_req, 73 .start_req = mac802154_mlme_start_req,
74 .get_pan_id = mac802154_dev_get_pan_id, 74 .get_pan_id = mac802154_dev_get_pan_id,
75 .get_short_addr = mac802154_dev_get_short_addr, 75 .get_short_addr = mac802154_dev_get_short_addr,
76 .get_dsn = mac802154_dev_get_dsn,
76}; 77};
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index f47781ab0ccc..8ded97cf1c33 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -159,6 +159,15 @@ void mac802154_dev_set_pan_id(struct net_device *dev, u16 val)
159 } 159 }
160} 160}
161 161
162u8 mac802154_dev_get_dsn(const struct net_device *dev)
163{
164 struct mac802154_sub_if_data *priv = netdev_priv(dev);
165
166 BUG_ON(dev->type != ARPHRD_IEEE802154);
167
168 return priv->dsn++;
169}
170
162static void phy_chan_notify(struct work_struct *work) 171static void phy_chan_notify(struct work_struct *work)
163{ 172{
164 struct phy_chan_notify_work *nw = container_of(work, 173 struct phy_chan_notify_work *nw = container_of(work,
@@ -167,9 +176,15 @@ static void phy_chan_notify(struct work_struct *work)
167 struct mac802154_sub_if_data *priv = netdev_priv(nw->dev); 176 struct mac802154_sub_if_data *priv = netdev_priv(nw->dev);
168 int res; 177 int res;
169 178
179 mutex_lock(&priv->hw->phy->pib_lock);
170 res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan); 180 res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan);
171 if (res) 181 if (res)
172 pr_debug("set_channel failed\n"); 182 pr_debug("set_channel failed\n");
183 else {
184 priv->hw->phy->current_channel = priv->chan;
185 priv->hw->phy->current_page = priv->page;
186 }
187 mutex_unlock(&priv->hw->phy->pib_lock);
173 188
174 kfree(nw); 189 kfree(nw);
175} 190}
@@ -186,8 +201,11 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
186 priv->chan = chan; 201 priv->chan = chan;
187 spin_unlock_bh(&priv->mib_lock); 202 spin_unlock_bh(&priv->mib_lock);
188 203
204 mutex_lock(&priv->hw->phy->pib_lock);
189 if (priv->hw->phy->current_channel != priv->chan || 205 if (priv->hw->phy->current_channel != priv->chan ||
190 priv->hw->phy->current_page != priv->page) { 206 priv->hw->phy->current_page != priv->page) {
207 mutex_unlock(&priv->hw->phy->pib_lock);
208
191 work = kzalloc(sizeof(*work), GFP_ATOMIC); 209 work = kzalloc(sizeof(*work), GFP_ATOMIC);
192 if (!work) 210 if (!work)
193 return; 211 return;
@@ -195,5 +213,6 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
195 INIT_WORK(&work->work, phy_chan_notify); 213 INIT_WORK(&work->work, phy_chan_notify);
196 work->dev = dev; 214 work->dev = dev;
197 queue_work(priv->hw->dev_workqueue, &work->work); 215 queue_work(priv->hw->dev_workqueue, &work->work);
198 } 216 } else
217 mutex_unlock(&priv->hw->phy->pib_lock);
199} 218}
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 4e09d070995a..6d1647399d4f 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -25,6 +25,7 @@
25#include <linux/if_arp.h> 25#include <linux/if_arp.h>
26#include <linux/crc-ccitt.h> 26#include <linux/crc-ccitt.h>
27 27
28#include <net/ieee802154_netdev.h>
28#include <net/mac802154.h> 29#include <net/mac802154.h>
29#include <net/wpan-phy.h> 30#include <net/wpan-phy.h>
30 31
@@ -39,12 +40,12 @@ struct xmit_work {
39 struct mac802154_priv *priv; 40 struct mac802154_priv *priv;
40 u8 chan; 41 u8 chan;
41 u8 page; 42 u8 page;
42 u8 xmit_attempts;
43}; 43};
44 44
45static void mac802154_xmit_worker(struct work_struct *work) 45static void mac802154_xmit_worker(struct work_struct *work)
46{ 46{
47 struct xmit_work *xw = container_of(work, struct xmit_work, work); 47 struct xmit_work *xw = container_of(work, struct xmit_work, work);
48 struct mac802154_sub_if_data *sdata;
48 int res; 49 int res;
49 50
50 mutex_lock(&xw->priv->phy->pib_lock); 51 mutex_lock(&xw->priv->phy->pib_lock);
@@ -57,21 +58,23 @@ static void mac802154_xmit_worker(struct work_struct *work)
57 pr_debug("set_channel failed\n"); 58 pr_debug("set_channel failed\n");
58 goto out; 59 goto out;
59 } 60 }
61
62 xw->priv->phy->current_channel = xw->chan;
63 xw->priv->phy->current_page = xw->page;
60 } 64 }
61 65
62 res = xw->priv->ops->xmit(&xw->priv->hw, xw->skb); 66 res = xw->priv->ops->xmit(&xw->priv->hw, xw->skb);
67 if (res)
68 pr_debug("transmission failed\n");
63 69
64out: 70out:
65 mutex_unlock(&xw->priv->phy->pib_lock); 71 mutex_unlock(&xw->priv->phy->pib_lock);
66 72
67 if (res) { 73 /* Restart the netif queue on each sub_if_data object. */
68 if (xw->xmit_attempts++ < MAC802154_MAX_XMIT_ATTEMPTS) { 74 rcu_read_lock();
69 queue_work(xw->priv->dev_workqueue, &xw->work); 75 list_for_each_entry_rcu(sdata, &xw->priv->slaves, list)
70 return; 76 netif_wake_queue(sdata->dev);
71 } else 77 rcu_read_unlock();
72 pr_debug("transmission failed for %d times",
73 MAC802154_MAX_XMIT_ATTEMPTS);
74 }
75 78
76 dev_kfree_skb(xw->skb); 79 dev_kfree_skb(xw->skb);
77 80
@@ -82,6 +85,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
82 u8 page, u8 chan) 85 u8 page, u8 chan)
83{ 86{
84 struct xmit_work *work; 87 struct xmit_work *work;
88 struct mac802154_sub_if_data *sdata;
85 89
86 if (!(priv->phy->channels_supported[page] & (1 << chan))) { 90 if (!(priv->phy->channels_supported[page] & (1 << chan))) {
87 WARN_ON(1); 91 WARN_ON(1);
@@ -109,12 +113,17 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
109 return NETDEV_TX_BUSY; 113 return NETDEV_TX_BUSY;
110 } 114 }
111 115
116 /* Stop the netif queue on each sub_if_data object. */
117 rcu_read_lock();
118 list_for_each_entry_rcu(sdata, &priv->slaves, list)
119 netif_stop_queue(sdata->dev);
120 rcu_read_unlock();
121
112 INIT_WORK(&work->work, mac802154_xmit_worker); 122 INIT_WORK(&work->work, mac802154_xmit_worker);
113 work->skb = skb; 123 work->skb = skb;
114 work->priv = priv; 124 work->priv = priv;
115 work->page = page; 125 work->page = page;
116 work->chan = chan; 126 work->chan = chan;
117 work->xmit_attempts = 0;
118 127
119 queue_work(priv->dev_workqueue, &work->work); 128 queue_work(priv->dev_workqueue, &work->work);
120 129
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index d20c6d3c247d..2ca2f4dceab7 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -145,6 +145,8 @@ static int mac802154_header_create(struct sk_buff *skb,
145 145
146 head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */ 146 head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */
147 fc = mac_cb_type(skb); 147 fc = mac_cb_type(skb);
148 if (mac_cb_is_ackreq(skb))
149 fc |= IEEE802154_FC_ACK_REQ;
148 150
149 if (!saddr) { 151 if (!saddr) {
150 spin_lock_bh(&priv->mib_lock); 152 spin_lock_bh(&priv->mib_lock);
@@ -358,7 +360,7 @@ void mac802154_wpan_setup(struct net_device *dev)
358 dev->header_ops = &mac802154_header_ops; 360 dev->header_ops = &mac802154_header_ops;
359 dev->needed_tailroom = 2; /* FCS */ 361 dev->needed_tailroom = 2; /* FCS */
360 dev->mtu = IEEE802154_MTU; 362 dev->mtu = IEEE802154_MTU;
361 dev->tx_queue_len = 10; 363 dev->tx_queue_len = 300;
362 dev->type = ARPHRD_IEEE802154; 364 dev->type = ARPHRD_IEEE802154;
363 dev->flags = IFF_NOARP | IFF_BROADCAST; 365 dev->flags = IFF_NOARP | IFF_BROADCAST;
364 dev->watchdog_timeo = 0; 366 dev->watchdog_timeo = 0;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a9c488b6c50d..07c865a31a3d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -5,6 +5,7 @@
5 * way. 5 * way.
6 * 6 *
7 * Rusty Russell (C)2000 -- This code is GPL. 7 * Rusty Russell (C)2000 -- This code is GPL.
8 * Patrick McHardy (c) 2006-2012
8 */ 9 */
9#include <linux/kernel.h> 10#include <linux/kernel.h>
10#include <linux/netfilter.h> 11#include <linux/netfilter.h>
@@ -276,10 +277,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
276EXPORT_SYMBOL(nf_nat_decode_session_hook); 277EXPORT_SYMBOL(nf_nat_decode_session_hook);
277#endif 278#endif
278 279
280static int __net_init netfilter_net_init(struct net *net)
281{
279#ifdef CONFIG_PROC_FS 282#ifdef CONFIG_PROC_FS
280struct proc_dir_entry *proc_net_netfilter; 283 net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
281EXPORT_SYMBOL(proc_net_netfilter); 284 net->proc_net);
285 if (!net->nf.proc_netfilter) {
286 if (!net_eq(net, &init_net))
287 pr_err("cannot create netfilter proc entry");
288
289 return -ENOMEM;
290 }
282#endif 291#endif
292 return 0;
293}
294
295static void __net_exit netfilter_net_exit(struct net *net)
296{
297 remove_proc_entry("netfilter", net->proc_net);
298}
299
300static struct pernet_operations netfilter_net_ops = {
301 .init = netfilter_net_init,
302 .exit = netfilter_net_exit,
303};
283 304
284void __init netfilter_init(void) 305void __init netfilter_init(void)
285{ 306{
@@ -289,11 +310,8 @@ void __init netfilter_init(void)
289 INIT_LIST_HEAD(&nf_hooks[i][h]); 310 INIT_LIST_HEAD(&nf_hooks[i][h]);
290 } 311 }
291 312
292#ifdef CONFIG_PROC_FS 313 if (register_pernet_subsys(&netfilter_net_ops) < 0)
293 proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net);
294 if (!proc_net_netfilter)
295 panic("cannot create netfilter proc entry"); 314 panic("cannot create netfilter proc entry");
296#endif
297 315
298 if (netfilter_log_init() < 0) 316 if (netfilter_log_init() < 0)
299 panic("cannot initialize nf_log"); 317 panic("cannot initialize nf_log");
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 1ba9dbc0e107..86f5e26f39d3 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -15,7 +15,6 @@
15#include <linux/ip.h> 15#include <linux/ip.h>
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/rculist.h> 18#include <linux/rculist.h>
20#include <net/netlink.h> 19#include <net/netlink.h>
21 20
@@ -1085,7 +1084,7 @@ static int
1085dump_init(struct netlink_callback *cb) 1084dump_init(struct netlink_callback *cb)
1086{ 1085{
1087 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); 1086 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1088 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 1087 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1089 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; 1088 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1090 struct nlattr *attr = (void *)nlh + min_len; 1089 struct nlattr *attr = (void *)nlh + min_len;
1091 u32 dump_type; 1090 u32 dump_type;
@@ -1301,7 +1300,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1301 struct sk_buff *skb2; 1300 struct sk_buff *skb2;
1302 struct nlmsgerr *errmsg; 1301 struct nlmsgerr *errmsg;
1303 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh); 1302 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
1304 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 1303 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1305 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; 1304 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1306 struct nlattr *cmdattr; 1305 struct nlattr *cmdattr;
1307 u32 *errline; 1306 u32 *errline;
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 0b779d7df881..dfd7b65b3d2a 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
58 module_put(app->module); 58 module_put(app->module);
59} 59}
60 60
61static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
62{
63 kfree(inc->timeout_table);
64 kfree(inc);
65}
66
67static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
68{
69 struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
70
71 ip_vs_app_inc_destroy(inc);
72}
61 73
62/* 74/*
63 * Allocate/initialize app incarnation and register it in proto apps. 75 * Allocate/initialize app incarnation and register it in proto apps.
@@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
106 return 0; 118 return 0;
107 119
108 out: 120 out:
109 kfree(inc->timeout_table); 121 ip_vs_app_inc_destroy(inc);
110 kfree(inc);
111 return ret; 122 return ret;
112} 123}
113 124
@@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
131 142
132 list_del(&inc->a_list); 143 list_del(&inc->a_list);
133 144
134 kfree(inc->timeout_table); 145 call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
135 kfree(inc);
136} 146}
137 147
138 148
@@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
144{ 154{
145 int result; 155 int result;
146 156
147 atomic_inc(&inc->usecnt); 157 result = ip_vs_app_get(inc->app);
148 if (unlikely((result = ip_vs_app_get(inc->app)) != 1)) 158 if (result)
149 atomic_dec(&inc->usecnt); 159 atomic_inc(&inc->usecnt);
150 return result; 160 return result;
151} 161}
152 162
@@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
156 */ 166 */
157void ip_vs_app_inc_put(struct ip_vs_app *inc) 167void ip_vs_app_inc_put(struct ip_vs_app *inc)
158{ 168{
159 ip_vs_app_put(inc->app);
160 atomic_dec(&inc->usecnt); 169 atomic_dec(&inc->usecnt);
170 ip_vs_app_put(inc->app);
161} 171}
162 172
163 173
@@ -218,6 +228,7 @@ out_unlock:
218/* 228/*
219 * ip_vs_app unregistration routine 229 * ip_vs_app unregistration routine
220 * We are sure there are no app incarnations attached to services 230 * We are sure there are no app incarnations attached to services
231 * Caller should use synchronize_rcu() or rcu_barrier()
221 */ 232 */
222void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) 233void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
223{ 234{
@@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
341 unsigned int flag, __u32 seq, int diff) 352 unsigned int flag, __u32 seq, int diff)
342{ 353{
343 /* spinlock is to keep updating cp->flags atomic */ 354 /* spinlock is to keep updating cp->flags atomic */
344 spin_lock(&cp->lock); 355 spin_lock_bh(&cp->lock);
345 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { 356 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
346 vseq->previous_delta = vseq->delta; 357 vseq->previous_delta = vseq->delta;
347 vseq->delta += diff; 358 vseq->delta += diff;
348 vseq->init_seq = seq; 359 vseq->init_seq = seq;
349 cp->flags |= flag; 360 cp->flags |= flag;
350 } 361 }
351 spin_unlock(&cp->lock); 362 spin_unlock_bh(&cp->lock);
352} 363}
353 364
354static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 365static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 704e514e02ab..de6475894a39 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
79 79
80struct ip_vs_aligned_lock 80struct ip_vs_aligned_lock
81{ 81{
82 rwlock_t l; 82 spinlock_t l;
83} __attribute__((__aligned__(SMP_CACHE_BYTES))); 83} __attribute__((__aligned__(SMP_CACHE_BYTES)));
84 84
85/* lock array for conn table */ 85/* lock array for conn table */
86static struct ip_vs_aligned_lock 86static struct ip_vs_aligned_lock
87__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; 87__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
88 88
89static inline void ct_read_lock(unsigned int key)
90{
91 read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
92}
93
94static inline void ct_read_unlock(unsigned int key)
95{
96 read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
97}
98
99static inline void ct_write_lock(unsigned int key)
100{
101 write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
102}
103
104static inline void ct_write_unlock(unsigned int key)
105{
106 write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
107}
108
109static inline void ct_read_lock_bh(unsigned int key)
110{
111 read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
112}
113
114static inline void ct_read_unlock_bh(unsigned int key)
115{
116 read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
117}
118
119static inline void ct_write_lock_bh(unsigned int key) 89static inline void ct_write_lock_bh(unsigned int key)
120{ 90{
121 write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); 91 spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
122} 92}
123 93
124static inline void ct_write_unlock_bh(unsigned int key) 94static inline void ct_write_unlock_bh(unsigned int key)
125{ 95{
126 write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); 96 spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
127} 97}
128 98
129 99
@@ -197,13 +167,13 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
197 /* Hash by protocol, client address and port */ 167 /* Hash by protocol, client address and port */
198 hash = ip_vs_conn_hashkey_conn(cp); 168 hash = ip_vs_conn_hashkey_conn(cp);
199 169
200 ct_write_lock(hash); 170 ct_write_lock_bh(hash);
201 spin_lock(&cp->lock); 171 spin_lock(&cp->lock);
202 172
203 if (!(cp->flags & IP_VS_CONN_F_HASHED)) { 173 if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
204 hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]);
205 cp->flags |= IP_VS_CONN_F_HASHED; 174 cp->flags |= IP_VS_CONN_F_HASHED;
206 atomic_inc(&cp->refcnt); 175 atomic_inc(&cp->refcnt);
176 hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
207 ret = 1; 177 ret = 1;
208 } else { 178 } else {
209 pr_err("%s(): request for already hashed, called from %pF\n", 179 pr_err("%s(): request for already hashed, called from %pF\n",
@@ -212,7 +182,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
212 } 182 }
213 183
214 spin_unlock(&cp->lock); 184 spin_unlock(&cp->lock);
215 ct_write_unlock(hash); 185 ct_write_unlock_bh(hash);
216 186
217 return ret; 187 return ret;
218} 188}
@@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
220 190
221/* 191/*
222 * UNhashes ip_vs_conn from ip_vs_conn_tab. 192 * UNhashes ip_vs_conn from ip_vs_conn_tab.
223 * returns bool success. 193 * returns bool success. Caller should hold conn reference.
224 */ 194 */
225static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) 195static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
226{ 196{
@@ -230,11 +200,11 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
230 /* unhash it and decrease its reference counter */ 200 /* unhash it and decrease its reference counter */
231 hash = ip_vs_conn_hashkey_conn(cp); 201 hash = ip_vs_conn_hashkey_conn(cp);
232 202
233 ct_write_lock(hash); 203 ct_write_lock_bh(hash);
234 spin_lock(&cp->lock); 204 spin_lock(&cp->lock);
235 205
236 if (cp->flags & IP_VS_CONN_F_HASHED) { 206 if (cp->flags & IP_VS_CONN_F_HASHED) {
237 hlist_del(&cp->c_list); 207 hlist_del_rcu(&cp->c_list);
238 cp->flags &= ~IP_VS_CONN_F_HASHED; 208 cp->flags &= ~IP_VS_CONN_F_HASHED;
239 atomic_dec(&cp->refcnt); 209 atomic_dec(&cp->refcnt);
240 ret = 1; 210 ret = 1;
@@ -242,7 +212,37 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
242 ret = 0; 212 ret = 0;
243 213
244 spin_unlock(&cp->lock); 214 spin_unlock(&cp->lock);
245 ct_write_unlock(hash); 215 ct_write_unlock_bh(hash);
216
217 return ret;
218}
219
220/* Try to unlink ip_vs_conn from ip_vs_conn_tab.
221 * returns bool success.
222 */
223static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
224{
225 unsigned int hash;
226 bool ret;
227
228 hash = ip_vs_conn_hashkey_conn(cp);
229
230 ct_write_lock_bh(hash);
231 spin_lock(&cp->lock);
232
233 if (cp->flags & IP_VS_CONN_F_HASHED) {
234 ret = false;
235 /* Decrease refcnt and unlink conn only if we are last user */
236 if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
237 hlist_del_rcu(&cp->c_list);
238 cp->flags &= ~IP_VS_CONN_F_HASHED;
239 ret = true;
240 }
241 } else
242 ret = atomic_read(&cp->refcnt) ? false : true;
243
244 spin_unlock(&cp->lock);
245 ct_write_unlock_bh(hash);
246 246
247 return ret; 247 return ret;
248} 248}
@@ -262,24 +262,25 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
262 262
263 hash = ip_vs_conn_hashkey_param(p, false); 263 hash = ip_vs_conn_hashkey_param(p, false);
264 264
265 ct_read_lock(hash); 265 rcu_read_lock();
266 266
267 hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 267 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
268 if (cp->af == p->af && 268 if (p->cport == cp->cport && p->vport == cp->vport &&
269 p->cport == cp->cport && p->vport == cp->vport && 269 cp->af == p->af &&
270 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && 270 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
271 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && 271 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
272 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 272 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
273 p->protocol == cp->protocol && 273 p->protocol == cp->protocol &&
274 ip_vs_conn_net_eq(cp, p->net)) { 274 ip_vs_conn_net_eq(cp, p->net)) {
275 if (!__ip_vs_conn_get(cp))
276 continue;
275 /* HIT */ 277 /* HIT */
276 atomic_inc(&cp->refcnt); 278 rcu_read_unlock();
277 ct_read_unlock(hash);
278 return cp; 279 return cp;
279 } 280 }
280 } 281 }
281 282
282 ct_read_unlock(hash); 283 rcu_read_unlock();
283 284
284 return NULL; 285 return NULL;
285} 286}
@@ -346,14 +347,16 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
346 347
347 hash = ip_vs_conn_hashkey_param(p, false); 348 hash = ip_vs_conn_hashkey_param(p, false);
348 349
349 ct_read_lock(hash); 350 rcu_read_lock();
350 351
351 hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 352 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
352 if (!ip_vs_conn_net_eq(cp, p->net)) 353 if (unlikely(p->pe_data && p->pe->ct_match)) {
353 continue; 354 if (!ip_vs_conn_net_eq(cp, p->net))
354 if (p->pe_data && p->pe->ct_match) { 355 continue;
355 if (p->pe == cp->pe && p->pe->ct_match(p, cp)) 356 if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
356 goto out; 357 if (__ip_vs_conn_get(cp))
358 goto out;
359 }
357 continue; 360 continue;
358 } 361 }
359 362
@@ -363,17 +366,18 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
363 * p->vaddr is a fwmark */ 366 * p->vaddr is a fwmark */
364 ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : 367 ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
365 p->af, p->vaddr, &cp->vaddr) && 368 p->af, p->vaddr, &cp->vaddr) &&
366 p->cport == cp->cport && p->vport == cp->vport && 369 p->vport == cp->vport && p->cport == cp->cport &&
367 cp->flags & IP_VS_CONN_F_TEMPLATE && 370 cp->flags & IP_VS_CONN_F_TEMPLATE &&
368 p->protocol == cp->protocol) 371 p->protocol == cp->protocol &&
369 goto out; 372 ip_vs_conn_net_eq(cp, p->net)) {
373 if (__ip_vs_conn_get(cp))
374 goto out;
375 }
370 } 376 }
371 cp = NULL; 377 cp = NULL;
372 378
373 out: 379 out:
374 if (cp) 380 rcu_read_unlock();
375 atomic_inc(&cp->refcnt);
376 ct_read_unlock(hash);
377 381
378 IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", 382 IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
379 ip_vs_proto_name(p->protocol), 383 ip_vs_proto_name(p->protocol),
@@ -398,23 +402,24 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
398 */ 402 */
399 hash = ip_vs_conn_hashkey_param(p, true); 403 hash = ip_vs_conn_hashkey_param(p, true);
400 404
401 ct_read_lock(hash); 405 rcu_read_lock();
402 406
403 hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 407 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
404 if (cp->af == p->af && 408 if (p->vport == cp->cport && p->cport == cp->dport &&
405 p->vport == cp->cport && p->cport == cp->dport && 409 cp->af == p->af &&
406 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && 410 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
407 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && 411 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
408 p->protocol == cp->protocol && 412 p->protocol == cp->protocol &&
409 ip_vs_conn_net_eq(cp, p->net)) { 413 ip_vs_conn_net_eq(cp, p->net)) {
414 if (!__ip_vs_conn_get(cp))
415 continue;
410 /* HIT */ 416 /* HIT */
411 atomic_inc(&cp->refcnt);
412 ret = cp; 417 ret = cp;
413 break; 418 break;
414 } 419 }
415 } 420 }
416 421
417 ct_read_unlock(hash); 422 rcu_read_unlock();
418 423
419 IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", 424 IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
420 ip_vs_proto_name(p->protocol), 425 ip_vs_proto_name(p->protocol),
@@ -457,13 +462,13 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
457void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) 462void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
458{ 463{
459 if (ip_vs_conn_unhash(cp)) { 464 if (ip_vs_conn_unhash(cp)) {
460 spin_lock(&cp->lock); 465 spin_lock_bh(&cp->lock);
461 if (cp->flags & IP_VS_CONN_F_NO_CPORT) { 466 if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
462 atomic_dec(&ip_vs_conn_no_cport_cnt); 467 atomic_dec(&ip_vs_conn_no_cport_cnt);
463 cp->flags &= ~IP_VS_CONN_F_NO_CPORT; 468 cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
464 cp->cport = cport; 469 cp->cport = cport;
465 } 470 }
466 spin_unlock(&cp->lock); 471 spin_unlock_bh(&cp->lock);
467 472
468 /* hash on new dport */ 473 /* hash on new dport */
469 ip_vs_conn_hash(cp); 474 ip_vs_conn_hash(cp);
@@ -549,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
549 return; 554 return;
550 555
551 /* Increase the refcnt counter of the dest */ 556 /* Increase the refcnt counter of the dest */
552 atomic_inc(&dest->refcnt); 557 ip_vs_dest_hold(dest);
553 558
554 conn_flags = atomic_read(&dest->conn_flags); 559 conn_flags = atomic_read(&dest->conn_flags);
555 if (cp->protocol != IPPROTO_UDP) 560 if (cp->protocol != IPPROTO_UDP)
@@ -606,20 +611,22 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
606 * Check if there is a destination for the connection, if so 611 * Check if there is a destination for the connection, if so
607 * bind the connection to the destination. 612 * bind the connection to the destination.
608 */ 613 */
609struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) 614void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
610{ 615{
611 struct ip_vs_dest *dest; 616 struct ip_vs_dest *dest;
612 617
618 rcu_read_lock();
613 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, 619 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
614 cp->dport, &cp->vaddr, cp->vport, 620 cp->dport, &cp->vaddr, cp->vport,
615 cp->protocol, cp->fwmark, cp->flags); 621 cp->protocol, cp->fwmark, cp->flags);
616 if (dest) { 622 if (dest) {
617 struct ip_vs_proto_data *pd; 623 struct ip_vs_proto_data *pd;
618 624
619 spin_lock(&cp->lock); 625 spin_lock_bh(&cp->lock);
620 if (cp->dest) { 626 if (cp->dest) {
621 spin_unlock(&cp->lock); 627 spin_unlock_bh(&cp->lock);
622 return dest; 628 rcu_read_unlock();
629 return;
623 } 630 }
624 631
625 /* Applications work depending on the forwarding method 632 /* Applications work depending on the forwarding method
@@ -628,7 +635,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
628 ip_vs_unbind_app(cp); 635 ip_vs_unbind_app(cp);
629 636
630 ip_vs_bind_dest(cp, dest); 637 ip_vs_bind_dest(cp, dest);
631 spin_unlock(&cp->lock); 638 spin_unlock_bh(&cp->lock);
632 639
633 /* Update its packet transmitter */ 640 /* Update its packet transmitter */
634 cp->packet_xmit = NULL; 641 cp->packet_xmit = NULL;
@@ -643,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
643 if (pd && atomic_read(&pd->appcnt)) 650 if (pd && atomic_read(&pd->appcnt))
644 ip_vs_bind_app(cp, pd->pp); 651 ip_vs_bind_app(cp, pd->pp);
645 } 652 }
646 return dest; 653 rcu_read_unlock();
647} 654}
648 655
649 656
@@ -695,12 +702,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
695 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 702 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
696 } 703 }
697 704
698 /* 705 ip_vs_dest_put(dest);
699 * Simply decrease the refcnt of the dest, because the
700 * dest will be either in service's destination list
701 * or in the trash.
702 */
703 atomic_dec(&dest->refcnt);
704} 706}
705 707
706static int expire_quiescent_template(struct netns_ipvs *ipvs, 708static int expire_quiescent_template(struct netns_ipvs *ipvs,
@@ -757,41 +759,36 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
757 * Simply decrease the refcnt of the template, 759 * Simply decrease the refcnt of the template,
758 * don't restart its timer. 760 * don't restart its timer.
759 */ 761 */
760 atomic_dec(&ct->refcnt); 762 __ip_vs_conn_put(ct);
761 return 0; 763 return 0;
762 } 764 }
763 return 1; 765 return 1;
764} 766}
765 767
768static void ip_vs_conn_rcu_free(struct rcu_head *head)
769{
770 struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn,
771 rcu_head);
772
773 ip_vs_pe_put(cp->pe);
774 kfree(cp->pe_data);
775 kmem_cache_free(ip_vs_conn_cachep, cp);
776}
777
766static void ip_vs_conn_expire(unsigned long data) 778static void ip_vs_conn_expire(unsigned long data)
767{ 779{
768 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 780 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
769 struct net *net = ip_vs_conn_net(cp); 781 struct net *net = ip_vs_conn_net(cp);
770 struct netns_ipvs *ipvs = net_ipvs(net); 782 struct netns_ipvs *ipvs = net_ipvs(net);
771 783
772 cp->timeout = 60*HZ;
773
774 /*
775 * hey, I'm using it
776 */
777 atomic_inc(&cp->refcnt);
778
779 /* 784 /*
780 * do I control anybody? 785 * do I control anybody?
781 */ 786 */
782 if (atomic_read(&cp->n_control)) 787 if (atomic_read(&cp->n_control))
783 goto expire_later; 788 goto expire_later;
784 789
785 /* 790 /* Unlink conn if not referenced anymore */
786 * unhash it if it is hashed in the conn table 791 if (likely(ip_vs_conn_unlink(cp))) {
787 */
788 if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
789 goto expire_later;
790
791 /*
792 * refcnt==1 implies I'm the only one referrer
793 */
794 if (likely(atomic_read(&cp->refcnt) == 1)) {
795 /* delete the timer if it is activated by other users */ 792 /* delete the timer if it is activated by other users */
796 del_timer(&cp->timer); 793 del_timer(&cp->timer);
797 794
@@ -810,38 +807,41 @@ static void ip_vs_conn_expire(unsigned long data)
810 ip_vs_conn_drop_conntrack(cp); 807 ip_vs_conn_drop_conntrack(cp);
811 } 808 }
812 809
813 ip_vs_pe_put(cp->pe);
814 kfree(cp->pe_data);
815 if (unlikely(cp->app != NULL)) 810 if (unlikely(cp->app != NULL))
816 ip_vs_unbind_app(cp); 811 ip_vs_unbind_app(cp);
817 ip_vs_unbind_dest(cp); 812 ip_vs_unbind_dest(cp);
818 if (cp->flags & IP_VS_CONN_F_NO_CPORT) 813 if (cp->flags & IP_VS_CONN_F_NO_CPORT)
819 atomic_dec(&ip_vs_conn_no_cport_cnt); 814 atomic_dec(&ip_vs_conn_no_cport_cnt);
815 call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
820 atomic_dec(&ipvs->conn_count); 816 atomic_dec(&ipvs->conn_count);
821
822 kmem_cache_free(ip_vs_conn_cachep, cp);
823 return; 817 return;
824 } 818 }
825 819
826 /* hash it back to the table */
827 ip_vs_conn_hash(cp);
828
829 expire_later: 820 expire_later:
830 IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n", 821 IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
831 atomic_read(&cp->refcnt)-1, 822 atomic_read(&cp->refcnt),
832 atomic_read(&cp->n_control)); 823 atomic_read(&cp->n_control));
833 824
825 atomic_inc(&cp->refcnt);
826 cp->timeout = 60*HZ;
827
834 if (ipvs->sync_state & IP_VS_STATE_MASTER) 828 if (ipvs->sync_state & IP_VS_STATE_MASTER)
835 ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); 829 ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
836 830
837 ip_vs_conn_put(cp); 831 ip_vs_conn_put(cp);
838} 832}
839 833
840 834/* Modify timer, so that it expires as soon as possible.
835 * Can be called without reference only if under RCU lock.
836 */
841void ip_vs_conn_expire_now(struct ip_vs_conn *cp) 837void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
842{ 838{
843 if (del_timer(&cp->timer)) 839 /* Using mod_timer_pending will ensure the timer is not
844 mod_timer(&cp->timer, jiffies); 840 * modified after the final del_timer in ip_vs_conn_expire.
841 */
842 if (timer_pending(&cp->timer) &&
843 time_after(cp->timer.expires, jiffies))
844 mod_timer_pending(&cp->timer, jiffies);
845} 845}
846 846
847 847
@@ -858,7 +858,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
858 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, 858 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
859 p->protocol); 859 p->protocol);
860 860
861 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); 861 cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
862 if (cp == NULL) { 862 if (cp == NULL) {
863 IP_VS_ERR_RL("%s(): no memory\n", __func__); 863 IP_VS_ERR_RL("%s(): no memory\n", __func__);
864 return NULL; 864 return NULL;
@@ -869,13 +869,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
869 ip_vs_conn_net_set(cp, p->net); 869 ip_vs_conn_net_set(cp, p->net);
870 cp->af = p->af; 870 cp->af = p->af;
871 cp->protocol = p->protocol; 871 cp->protocol = p->protocol;
872 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); 872 ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
873 cp->cport = p->cport; 873 cp->cport = p->cport;
874 ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr); 874 ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);
875 cp->vport = p->vport; 875 cp->vport = p->vport;
876 /* proto should only be IPPROTO_IP if d_addr is a fwmark */ 876 /* proto should only be IPPROTO_IP if d_addr is a fwmark */
877 ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, 877 ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
878 &cp->daddr, daddr); 878 &cp->daddr, daddr);
879 cp->dport = dport; 879 cp->dport = dport;
880 cp->flags = flags; 880 cp->flags = flags;
881 cp->fwmark = fwmark; 881 cp->fwmark = fwmark;
@@ -884,6 +884,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
884 cp->pe = p->pe; 884 cp->pe = p->pe;
885 cp->pe_data = p->pe_data; 885 cp->pe_data = p->pe_data;
886 cp->pe_data_len = p->pe_data_len; 886 cp->pe_data_len = p->pe_data_len;
887 } else {
888 cp->pe = NULL;
889 cp->pe_data = NULL;
890 cp->pe_data_len = 0;
887 } 891 }
888 spin_lock_init(&cp->lock); 892 spin_lock_init(&cp->lock);
889 893
@@ -894,18 +898,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
894 */ 898 */
895 atomic_set(&cp->refcnt, 1); 899 atomic_set(&cp->refcnt, 1);
896 900
901 cp->control = NULL;
897 atomic_set(&cp->n_control, 0); 902 atomic_set(&cp->n_control, 0);
898 atomic_set(&cp->in_pkts, 0); 903 atomic_set(&cp->in_pkts, 0);
899 904
905 cp->packet_xmit = NULL;
906 cp->app = NULL;
907 cp->app_data = NULL;
908 /* reset struct ip_vs_seq */
909 cp->in_seq.delta = 0;
910 cp->out_seq.delta = 0;
911
900 atomic_inc(&ipvs->conn_count); 912 atomic_inc(&ipvs->conn_count);
901 if (flags & IP_VS_CONN_F_NO_CPORT) 913 if (flags & IP_VS_CONN_F_NO_CPORT)
902 atomic_inc(&ip_vs_conn_no_cport_cnt); 914 atomic_inc(&ip_vs_conn_no_cport_cnt);
903 915
904 /* Bind the connection with a destination server */ 916 /* Bind the connection with a destination server */
917 cp->dest = NULL;
905 ip_vs_bind_dest(cp, dest); 918 ip_vs_bind_dest(cp, dest);
906 919
907 /* Set its state and timeout */ 920 /* Set its state and timeout */
908 cp->state = 0; 921 cp->state = 0;
922 cp->old_state = 0;
909 cp->timeout = 3*HZ; 923 cp->timeout = 3*HZ;
910 cp->sync_endtime = jiffies & ~3UL; 924 cp->sync_endtime = jiffies & ~3UL;
911 925
@@ -952,14 +966,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
952 struct ip_vs_iter_state *iter = seq->private; 966 struct ip_vs_iter_state *iter = seq->private;
953 967
954 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 968 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
955 ct_read_lock_bh(idx); 969 rcu_read_lock();
956 hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 970 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
971 /* __ip_vs_conn_get() is not needed by
972 * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show
973 */
957 if (pos-- == 0) { 974 if (pos-- == 0) {
958 iter->l = &ip_vs_conn_tab[idx]; 975 iter->l = &ip_vs_conn_tab[idx];
959 return cp; 976 return cp;
960 } 977 }
961 } 978 }
962 ct_read_unlock_bh(idx); 979 rcu_read_unlock();
963 } 980 }
964 981
965 return NULL; 982 return NULL;
@@ -977,6 +994,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
977{ 994{
978 struct ip_vs_conn *cp = v; 995 struct ip_vs_conn *cp = v;
979 struct ip_vs_iter_state *iter = seq->private; 996 struct ip_vs_iter_state *iter = seq->private;
997 struct hlist_node *e;
980 struct hlist_head *l = iter->l; 998 struct hlist_head *l = iter->l;
981 int idx; 999 int idx;
982 1000
@@ -985,19 +1003,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
985 return ip_vs_conn_array(seq, 0); 1003 return ip_vs_conn_array(seq, 0);
986 1004
987 /* more on same hash chain? */ 1005 /* more on same hash chain? */
988 if (cp->c_list.next) 1006 e = rcu_dereference(hlist_next_rcu(&cp->c_list));
989 return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list); 1007 if (e)
1008 return hlist_entry(e, struct ip_vs_conn, c_list);
1009 rcu_read_unlock();
990 1010
991 idx = l - ip_vs_conn_tab; 1011 idx = l - ip_vs_conn_tab;
992 ct_read_unlock_bh(idx);
993
994 while (++idx < ip_vs_conn_tab_size) { 1012 while (++idx < ip_vs_conn_tab_size) {
995 ct_read_lock_bh(idx); 1013 rcu_read_lock();
996 hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 1014 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
997 iter->l = &ip_vs_conn_tab[idx]; 1015 iter->l = &ip_vs_conn_tab[idx];
998 return cp; 1016 return cp;
999 } 1017 }
1000 ct_read_unlock_bh(idx); 1018 rcu_read_unlock();
1001 } 1019 }
1002 iter->l = NULL; 1020 iter->l = NULL;
1003 return NULL; 1021 return NULL;
@@ -1009,7 +1027,7 @@ static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
1009 struct hlist_head *l = iter->l; 1027 struct hlist_head *l = iter->l;
1010 1028
1011 if (l) 1029 if (l)
1012 ct_read_unlock_bh(l - ip_vs_conn_tab); 1030 rcu_read_unlock();
1013} 1031}
1014 1032
1015static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) 1033static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
@@ -1188,7 +1206,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
1188void ip_vs_random_dropentry(struct net *net) 1206void ip_vs_random_dropentry(struct net *net)
1189{ 1207{
1190 int idx; 1208 int idx;
1191 struct ip_vs_conn *cp; 1209 struct ip_vs_conn *cp, *cp_c;
1192 1210
1193 /* 1211 /*
1194 * Randomly scan 1/32 of the whole table every second 1212 * Randomly scan 1/32 of the whole table every second
@@ -1199,9 +1217,9 @@ void ip_vs_random_dropentry(struct net *net)
1199 /* 1217 /*
1200 * Lock is actually needed in this loop. 1218 * Lock is actually needed in this loop.
1201 */ 1219 */
1202 ct_write_lock_bh(hash); 1220 rcu_read_lock();
1203 1221
1204 hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 1222 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
1205 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 1223 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
1206 /* connection template */ 1224 /* connection template */
1207 continue; 1225 continue;
@@ -1228,12 +1246,15 @@ void ip_vs_random_dropentry(struct net *net)
1228 1246
1229 IP_VS_DBG(4, "del connection\n"); 1247 IP_VS_DBG(4, "del connection\n");
1230 ip_vs_conn_expire_now(cp); 1248 ip_vs_conn_expire_now(cp);
1231 if (cp->control) { 1249 cp_c = cp->control;
1250 /* cp->control is valid only with reference to cp */
1251 if (cp_c && __ip_vs_conn_get(cp)) {
1232 IP_VS_DBG(4, "del conn template\n"); 1252 IP_VS_DBG(4, "del conn template\n");
1233 ip_vs_conn_expire_now(cp->control); 1253 ip_vs_conn_expire_now(cp_c);
1254 __ip_vs_conn_put(cp);
1234 } 1255 }
1235 } 1256 }
1236 ct_write_unlock_bh(hash); 1257 rcu_read_unlock();
1237 } 1258 }
1238} 1259}
1239 1260
@@ -1244,7 +1265,7 @@ void ip_vs_random_dropentry(struct net *net)
1244static void ip_vs_conn_flush(struct net *net) 1265static void ip_vs_conn_flush(struct net *net)
1245{ 1266{
1246 int idx; 1267 int idx;
1247 struct ip_vs_conn *cp; 1268 struct ip_vs_conn *cp, *cp_c;
1248 struct netns_ipvs *ipvs = net_ipvs(net); 1269 struct netns_ipvs *ipvs = net_ipvs(net);
1249 1270
1250flush_again: 1271flush_again:
@@ -1252,19 +1273,22 @@ flush_again:
1252 /* 1273 /*
1253 * Lock is actually needed in this loop. 1274 * Lock is actually needed in this loop.
1254 */ 1275 */
1255 ct_write_lock_bh(idx); 1276 rcu_read_lock();
1256 1277
1257 hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 1278 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
1258 if (!ip_vs_conn_net_eq(cp, net)) 1279 if (!ip_vs_conn_net_eq(cp, net))
1259 continue; 1280 continue;
1260 IP_VS_DBG(4, "del connection\n"); 1281 IP_VS_DBG(4, "del connection\n");
1261 ip_vs_conn_expire_now(cp); 1282 ip_vs_conn_expire_now(cp);
1262 if (cp->control) { 1283 cp_c = cp->control;
1284 /* cp->control is valid only with reference to cp */
1285 if (cp_c && __ip_vs_conn_get(cp)) {
1263 IP_VS_DBG(4, "del conn template\n"); 1286 IP_VS_DBG(4, "del conn template\n");
1264 ip_vs_conn_expire_now(cp->control); 1287 ip_vs_conn_expire_now(cp_c);
1288 __ip_vs_conn_put(cp);
1265 } 1289 }
1266 } 1290 }
1267 ct_write_unlock_bh(idx); 1291 rcu_read_unlock();
1268 } 1292 }
1269 1293
1270 /* the counter may be not NULL, because maybe some conn entries 1294 /* the counter may be not NULL, because maybe some conn entries
@@ -1331,7 +1355,7 @@ int __init ip_vs_conn_init(void)
1331 INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]); 1355 INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);
1332 1356
1333 for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { 1357 for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) {
1334 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); 1358 spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l);
1335 } 1359 }
1336 1360
1337 /* calculate the random value for connection hash */ 1361 /* calculate the random value for connection hash */
@@ -1342,6 +1366,8 @@ int __init ip_vs_conn_init(void)
1342 1366
1343void ip_vs_conn_cleanup(void) 1367void ip_vs_conn_cleanup(void)
1344{ 1368{
1369 /* Wait all ip_vs_conn_rcu_free() callbacks to complete */
1370 rcu_barrier();
1345 /* Release the empty cache */ 1371 /* Release the empty cache */
1346 kmem_cache_destroy(ip_vs_conn_cachep); 1372 kmem_cache_destroy(ip_vs_conn_cachep);
1347 vfree(ip_vs_conn_tab); 1373 vfree(ip_vs_conn_tab);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 61f49d241712..f26fe3353a30 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
69EXPORT_SYMBOL(ip_vs_get_debug_level); 69EXPORT_SYMBOL(ip_vs_get_debug_level);
70#endif 70#endif
71 71
72int ip_vs_net_id __read_mostly; 72static int ip_vs_net_id __read_mostly;
73#ifdef IP_VS_GENERIC_NETNS
74EXPORT_SYMBOL(ip_vs_net_id);
75#endif
76/* netns cnt used for uniqueness */ 73/* netns cnt used for uniqueness */
77static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); 74static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
78 75
@@ -206,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
206{ 203{
207 ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, 204 ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
208 vport, p); 205 vport, p);
209 p->pe = svc->pe; 206 p->pe = rcu_dereference(svc->pe);
210 if (p->pe && p->pe->fill_param) 207 if (p->pe && p->pe->fill_param)
211 return p->pe->fill_param(p, skb); 208 return p->pe->fill_param(p, skb);
212 209
@@ -299,12 +296,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
299 /* Check if a template already exists */ 296 /* Check if a template already exists */
300 ct = ip_vs_ct_in_get(&param); 297 ct = ip_vs_ct_in_get(&param);
301 if (!ct || !ip_vs_check_template(ct)) { 298 if (!ct || !ip_vs_check_template(ct)) {
299 struct ip_vs_scheduler *sched;
300
302 /* 301 /*
303 * No template found or the dest of the connection 302 * No template found or the dest of the connection
304 * template is not available. 303 * template is not available.
305 * return *ignored=0 i.e. ICMP and NF_DROP 304 * return *ignored=0 i.e. ICMP and NF_DROP
306 */ 305 */
307 dest = svc->scheduler->schedule(svc, skb); 306 sched = rcu_dereference(svc->scheduler);
307 dest = sched->schedule(svc, skb);
308 if (!dest) { 308 if (!dest) {
309 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 309 IP_VS_DBG(1, "p-schedule: no dest found.\n");
310 kfree(param.pe_data); 310 kfree(param.pe_data);
@@ -394,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
394{ 394{
395 struct ip_vs_protocol *pp = pd->pp; 395 struct ip_vs_protocol *pp = pd->pp;
396 struct ip_vs_conn *cp = NULL; 396 struct ip_vs_conn *cp = NULL;
397 struct ip_vs_scheduler *sched;
397 struct ip_vs_dest *dest; 398 struct ip_vs_dest *dest;
398 __be16 _ports[2], *pptr; 399 __be16 _ports[2], *pptr;
399 unsigned int flags; 400 unsigned int flags;
@@ -449,7 +450,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
449 return NULL; 450 return NULL;
450 } 451 }
451 452
452 dest = svc->scheduler->schedule(svc, skb); 453 sched = rcu_dereference(svc->scheduler);
454 dest = sched->schedule(svc, skb);
453 if (dest == NULL) { 455 if (dest == NULL) {
454 IP_VS_DBG(1, "Schedule: no dest found.\n"); 456 IP_VS_DBG(1, "Schedule: no dest found.\n");
455 return NULL; 457 return NULL;
@@ -507,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
507 509
508 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); 510 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
509 if (pptr == NULL) { 511 if (pptr == NULL) {
510 ip_vs_service_put(svc);
511 return NF_DROP; 512 return NF_DROP;
512 } 513 }
513 514
@@ -533,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
533 IP_VS_CONN_F_ONE_PACKET : 0; 534 IP_VS_CONN_F_ONE_PACKET : 0;
534 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 535 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
535 536
536 ip_vs_service_put(svc);
537
538 /* create a new connection entry */ 537 /* create a new connection entry */
539 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 538 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
540 { 539 {
@@ -571,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
571 * listed in the ipvs table), pass the packets, because it is 570 * listed in the ipvs table), pass the packets, because it is
572 * not ipvs job to decide to drop the packets. 571 * not ipvs job to decide to drop the packets.
573 */ 572 */
574 if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { 573 if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
575 ip_vs_service_put(svc);
576 return NF_ACCEPT; 574 return NF_ACCEPT;
577 }
578
579 ip_vs_service_put(svc);
580 575
581 /* 576 /*
582 * Notify the client that the destination is unreachable, and 577 * Notify the client that the destination is unreachable, and
@@ -643,8 +638,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
643 638
644static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) 639static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
645{ 640{
646 int err = ip_defrag(skb, user); 641 int err;
647 642
643 local_bh_disable();
644 err = ip_defrag(skb, user);
645 local_bh_enable();
648 if (!err) 646 if (!err)
649 ip_send_check(ip_hdr(skb)); 647 ip_send_check(ip_hdr(skb));
650 648
@@ -1164,9 +1162,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1164 sizeof(_ports), _ports, &iph); 1162 sizeof(_ports), _ports, &iph);
1165 if (pptr == NULL) 1163 if (pptr == NULL)
1166 return NF_ACCEPT; /* Not for me */ 1164 return NF_ACCEPT; /* Not for me */
1167 if (ip_vs_lookup_real_service(net, af, iph.protocol, 1165 if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
1168 &iph.saddr, 1166 pptr[0])) {
1169 pptr[0])) {
1170 /* 1167 /*
1171 * Notify the real server: there is no 1168 * Notify the real server: there is no
1172 * existing entry if it is not RST 1169 * existing entry if it is not RST
@@ -1181,9 +1178,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1181 iph.len)))) { 1178 iph.len)))) {
1182#ifdef CONFIG_IP_VS_IPV6 1179#ifdef CONFIG_IP_VS_IPV6
1183 if (af == AF_INET6) { 1180 if (af == AF_INET6) {
1184 struct net *net =
1185 dev_net(skb_dst(skb)->dev);
1186
1187 if (!skb->dev) 1181 if (!skb->dev)
1188 skb->dev = net->loopback_dev; 1182 skb->dev = net->loopback_dev;
1189 icmpv6_send(skb, 1183 icmpv6_send(skb,
@@ -1226,13 +1220,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
1226 const struct net_device *in, const struct net_device *out, 1220 const struct net_device *in, const struct net_device *out,
1227 int (*okfn)(struct sk_buff *)) 1221 int (*okfn)(struct sk_buff *))
1228{ 1222{
1229 unsigned int verdict; 1223 return ip_vs_out(hooknum, skb, AF_INET);
1230
1231 /* Disable BH in LOCAL_OUT until all places are fixed */
1232 local_bh_disable();
1233 verdict = ip_vs_out(hooknum, skb, AF_INET);
1234 local_bh_enable();
1235 return verdict;
1236} 1224}
1237 1225
1238#ifdef CONFIG_IP_VS_IPV6 1226#ifdef CONFIG_IP_VS_IPV6
@@ -1259,13 +1247,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1259 const struct net_device *in, const struct net_device *out, 1247 const struct net_device *in, const struct net_device *out,
1260 int (*okfn)(struct sk_buff *)) 1248 int (*okfn)(struct sk_buff *))
1261{ 1249{
1262 unsigned int verdict; 1250 return ip_vs_out(hooknum, skb, AF_INET6);
1263
1264 /* Disable BH in LOCAL_OUT until all places are fixed */
1265 local_bh_disable();
1266 verdict = ip_vs_out(hooknum, skb, AF_INET6);
1267 local_bh_enable();
1268 return verdict;
1269} 1251}
1270 1252
1271#endif 1253#endif
@@ -1401,10 +1383,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1401 goto ignore_ipip; 1383 goto ignore_ipip;
1402 /* Prefer the resulting PMTU */ 1384 /* Prefer the resulting PMTU */
1403 if (dest) { 1385 if (dest) {
1404 spin_lock(&dest->dst_lock); 1386 struct ip_vs_dest_dst *dest_dst;
1405 if (dest->dst_cache) 1387
1406 mtu = dst_mtu(dest->dst_cache); 1388 rcu_read_lock();
1407 spin_unlock(&dest->dst_lock); 1389 dest_dst = rcu_dereference(dest->dest_dst);
1390 if (dest_dst)
1391 mtu = dst_mtu(dest_dst->dst_cache);
1392 rcu_read_unlock();
1408 } 1393 }
1409 if (mtu > 68 + sizeof(struct iphdr)) 1394 if (mtu > 68 + sizeof(struct iphdr))
1410 mtu -= sizeof(struct iphdr); 1395 mtu -= sizeof(struct iphdr);
@@ -1720,13 +1705,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
1720 const struct net_device *in, const struct net_device *out, 1705 const struct net_device *in, const struct net_device *out,
1721 int (*okfn)(struct sk_buff *)) 1706 int (*okfn)(struct sk_buff *))
1722{ 1707{
1723 unsigned int verdict; 1708 return ip_vs_in(hooknum, skb, AF_INET);
1724
1725 /* Disable BH in LOCAL_OUT until all places are fixed */
1726 local_bh_disable();
1727 verdict = ip_vs_in(hooknum, skb, AF_INET);
1728 local_bh_enable();
1729 return verdict;
1730} 1709}
1731 1710
1732#ifdef CONFIG_IP_VS_IPV6 1711#ifdef CONFIG_IP_VS_IPV6
@@ -1785,13 +1764,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
1785 const struct net_device *in, const struct net_device *out, 1764 const struct net_device *in, const struct net_device *out,
1786 int (*okfn)(struct sk_buff *)) 1765 int (*okfn)(struct sk_buff *))
1787{ 1766{
1788 unsigned int verdict; 1767 return ip_vs_in(hooknum, skb, AF_INET6);
1789
1790 /* Disable BH in LOCAL_OUT until all places are fixed */
1791 local_bh_disable();
1792 verdict = ip_vs_in(hooknum, skb, AF_INET6);
1793 local_bh_enable();
1794 return verdict;
1795} 1768}
1796 1769
1797#endif 1770#endif
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 9e2d1cccd1eb..9e4074c26dc2 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -55,9 +55,6 @@
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56static DEFINE_MUTEX(__ip_vs_mutex); 56static DEFINE_MUTEX(__ip_vs_mutex);
57 57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61/* sysctl variables */ 58/* sysctl variables */
62 59
63#ifdef CONFIG_IP_VS_DEBUG 60#ifdef CONFIG_IP_VS_DEBUG
@@ -71,7 +68,7 @@ int ip_vs_get_debug_level(void)
71 68
72 69
73/* Protos */ 70/* Protos */
74static void __ip_vs_del_service(struct ip_vs_service *svc); 71static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
75 72
76 73
77#ifdef CONFIG_IP_VS_IPV6 74#ifdef CONFIG_IP_VS_IPV6
@@ -257,9 +254,9 @@ ip_vs_use_count_dec(void)
257#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 254#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258 255
259/* the service table hashed by <protocol, addr, port> */ 256/* the service table hashed by <protocol, addr, port> */
260static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 257static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261/* the service table hashed by fwmark */ 258/* the service table hashed by fwmark */
262static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 259static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263 260
264 261
265/* 262/*
@@ -271,16 +268,18 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
271{ 268{
272 register unsigned int porth = ntohs(port); 269 register unsigned int porth = ntohs(port);
273 __be32 addr_fold = addr->ip; 270 __be32 addr_fold = addr->ip;
271 __u32 ahash;
274 272
275#ifdef CONFIG_IP_VS_IPV6 273#ifdef CONFIG_IP_VS_IPV6
276 if (af == AF_INET6) 274 if (af == AF_INET6)
277 addr_fold = addr->ip6[0]^addr->ip6[1]^ 275 addr_fold = addr->ip6[0]^addr->ip6[1]^
278 addr->ip6[2]^addr->ip6[3]; 276 addr->ip6[2]^addr->ip6[3];
279#endif 277#endif
280 addr_fold ^= ((size_t)net>>8); 278 ahash = ntohl(addr_fold);
279 ahash ^= ((size_t) net >> 8);
281 280
282 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) 281 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
283 & IP_VS_SVC_TAB_MASK; 282 IP_VS_SVC_TAB_MASK;
284} 283}
285 284
286/* 285/*
@@ -312,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
312 */ 311 */
313 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, 312 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314 &svc->addr, svc->port); 313 &svc->addr, svc->port);
315 list_add(&svc->s_list, &ip_vs_svc_table[hash]); 314 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
316 } else { 315 } else {
317 /* 316 /*
318 * Hash it by fwmark in svc_fwm_table 317 * Hash it by fwmark in svc_fwm_table
319 */ 318 */
320 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); 319 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 320 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322 } 321 }
323 322
324 svc->flags |= IP_VS_SVC_F_HASHED; 323 svc->flags |= IP_VS_SVC_F_HASHED;
@@ -342,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
342 341
343 if (svc->fwmark == 0) { 342 if (svc->fwmark == 0) {
344 /* Remove it from the svc_table table */ 343 /* Remove it from the svc_table table */
345 list_del(&svc->s_list); 344 hlist_del_rcu(&svc->s_list);
346 } else { 345 } else {
347 /* Remove it from the svc_fwm_table table */ 346 /* Remove it from the svc_fwm_table table */
348 list_del(&svc->f_list); 347 hlist_del_rcu(&svc->f_list);
349 } 348 }
350 349
351 svc->flags &= ~IP_VS_SVC_F_HASHED; 350 svc->flags &= ~IP_VS_SVC_F_HASHED;
@@ -367,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
367 /* Check for "full" addressed entries */ 366 /* Check for "full" addressed entries */
368 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); 367 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369 368
370 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ 369 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
371 if ((svc->af == af) 370 if ((svc->af == af)
372 && ip_vs_addr_equal(af, &svc->addr, vaddr) 371 && ip_vs_addr_equal(af, &svc->addr, vaddr)
373 && (svc->port == vport) 372 && (svc->port == vport)
@@ -394,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
394 /* Check for fwmark addressed entries */ 393 /* Check for fwmark addressed entries */
395 hash = ip_vs_svc_fwm_hashkey(net, fwmark); 394 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396 395
397 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 396 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398 if (svc->fwmark == fwmark && svc->af == af 397 if (svc->fwmark == fwmark && svc->af == af
399 && net_eq(svc->net, net)) { 398 && net_eq(svc->net, net)) {
400 /* HIT */ 399 /* HIT */
@@ -405,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
405 return NULL; 404 return NULL;
406} 405}
407 406
407/* Find service, called under RCU lock */
408struct ip_vs_service * 408struct ip_vs_service *
409ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, 409ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
410 const union nf_inet_addr *vaddr, __be16 vport) 410 const union nf_inet_addr *vaddr, __be16 vport)
411{ 411{
412 struct ip_vs_service *svc; 412 struct ip_vs_service *svc;
413 struct netns_ipvs *ipvs = net_ipvs(net); 413 struct netns_ipvs *ipvs = net_ipvs(net);
414 414
415 read_lock(&__ip_vs_svc_lock);
416
417 /* 415 /*
418 * Check the table hashed by fwmark first 416 * Check the table hashed by fwmark first
419 */ 417 */
@@ -449,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
449 } 447 }
450 448
451 out: 449 out:
452 if (svc)
453 atomic_inc(&svc->usecnt);
454 read_unlock(&__ip_vs_svc_lock);
455
456 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 450 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457 fwmark, ip_vs_proto_name(protocol), 451 fwmark, ip_vs_proto_name(protocol),
458 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 452 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
@@ -469,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
469 dest->svc = svc; 463 dest->svc = svc;
470} 464}
471 465
466static void ip_vs_service_free(struct ip_vs_service *svc)
467{
468 if (svc->stats.cpustats)
469 free_percpu(svc->stats.cpustats);
470 kfree(svc);
471}
472
472static void 473static void
473__ip_vs_unbind_svc(struct ip_vs_dest *dest) 474__ip_vs_unbind_svc(struct ip_vs_dest *dest)
474{ 475{
@@ -476,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
476 477
477 dest->svc = NULL; 478 dest->svc = NULL;
478 if (atomic_dec_and_test(&svc->refcnt)) { 479 if (atomic_dec_and_test(&svc->refcnt)) {
479 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", 480 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
480 svc->fwmark, 481 svc->fwmark,
481 IP_VS_DBG_ADDR(svc->af, &svc->addr), 482 IP_VS_DBG_ADDR(svc->af, &svc->addr),
482 ntohs(svc->port), atomic_read(&svc->usecnt)); 483 ntohs(svc->port));
483 free_percpu(svc->stats.cpustats); 484 ip_vs_service_free(svc);
484 kfree(svc);
485 } 485 }
486} 486}
487 487
@@ -506,17 +506,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
506 & IP_VS_RTAB_MASK; 506 & IP_VS_RTAB_MASK;
507} 507}
508 508
509/* 509/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
510 * Hashes ip_vs_dest in rs_table by <proto,addr,port>. 510static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
511 * should be called with locked tables.
512 */
513static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514{ 511{
515 unsigned int hash; 512 unsigned int hash;
516 513
517 if (!list_empty(&dest->d_list)) { 514 if (dest->in_rs_table)
518 return 0; 515 return;
519 }
520 516
521 /* 517 /*
522 * Hash by proto,addr,port, 518 * Hash by proto,addr,port,
@@ -524,64 +520,51 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
524 */ 520 */
525 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); 521 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526 522
527 list_add(&dest->d_list, &ipvs->rs_table[hash]); 523 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
528 524 dest->in_rs_table = 1;
529 return 1;
530} 525}
531 526
532/* 527/* Unhash ip_vs_dest from rs_table. */
533 * UNhashes ip_vs_dest from rs_table. 528static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
534 * should be called with locked tables.
535 */
536static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537{ 529{
538 /* 530 /*
539 * Remove it from the rs_table table. 531 * Remove it from the rs_table table.
540 */ 532 */
541 if (!list_empty(&dest->d_list)) { 533 if (dest->in_rs_table) {
542 list_del_init(&dest->d_list); 534 hlist_del_rcu(&dest->d_list);
535 dest->in_rs_table = 0;
543 } 536 }
544
545 return 1;
546} 537}
547 538
548/* 539/* Check if real service by <proto,addr,port> is present */
549 * Lookup real service by <proto,addr,port> in the real service table. 540bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
550 */ 541 const union nf_inet_addr *daddr, __be16 dport)
551struct ip_vs_dest *
552ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
553 const union nf_inet_addr *daddr,
554 __be16 dport)
555{ 542{
556 struct netns_ipvs *ipvs = net_ipvs(net); 543 struct netns_ipvs *ipvs = net_ipvs(net);
557 unsigned int hash; 544 unsigned int hash;
558 struct ip_vs_dest *dest; 545 struct ip_vs_dest *dest;
559 546
560 /* 547 /* Check for "full" addressed entries */
561 * Check for "full" addressed entries
562 * Return the first found entry
563 */
564 hash = ip_vs_rs_hashkey(af, daddr, dport); 548 hash = ip_vs_rs_hashkey(af, daddr, dport);
565 549
566 read_lock(&ipvs->rs_lock); 550 rcu_read_lock();
567 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { 551 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
568 if ((dest->af == af) 552 if (dest->port == dport &&
569 && ip_vs_addr_equal(af, &dest->addr, daddr) 553 dest->af == af &&
570 && (dest->port == dport) 554 ip_vs_addr_equal(af, &dest->addr, daddr) &&
571 && ((dest->protocol == protocol) || 555 (dest->protocol == protocol || dest->vfwmark)) {
572 dest->vfwmark)) {
573 /* HIT */ 556 /* HIT */
574 read_unlock(&ipvs->rs_lock); 557 rcu_read_unlock();
575 return dest; 558 return true;
576 } 559 }
577 } 560 }
578 read_unlock(&ipvs->rs_lock); 561 rcu_read_unlock();
579 562
580 return NULL; 563 return false;
581} 564}
582 565
583/* 566/* Lookup destination by {addr,port} in the given service
584 * Lookup destination by {addr,port} in the given service 567 * Called under RCU lock.
585 */ 568 */
586static struct ip_vs_dest * 569static struct ip_vs_dest *
587ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, 570ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
@@ -592,7 +575,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
592 /* 575 /*
593 * Find the destination for the given service 576 * Find the destination for the given service
594 */ 577 */
595 list_for_each_entry(dest, &svc->destinations, n_list) { 578 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
596 if ((dest->af == svc->af) 579 if ((dest->af == svc->af)
597 && ip_vs_addr_equal(svc->af, &dest->addr, daddr) 580 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
598 && (dest->port == dport)) { 581 && (dest->port == dport)) {
@@ -606,13 +589,11 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
606 589
607/* 590/*
608 * Find destination by {daddr,dport,vaddr,protocol} 591 * Find destination by {daddr,dport,vaddr,protocol}
609 * Cretaed to be used in ip_vs_process_message() in 592 * Created to be used in ip_vs_process_message() in
610 * the backup synchronization daemon. It finds the 593 * the backup synchronization daemon. It finds the
611 * destination to be bound to the received connection 594 * destination to be bound to the received connection
612 * on the backup. 595 * on the backup.
613 * 596 * Called under RCU lock, no refcnt is returned.
614 * ip_vs_lookup_real_service() looked promissing, but
615 * seems not working as expected.
616 */ 597 */
617struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, 598struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
618 const union nf_inet_addr *daddr, 599 const union nf_inet_addr *daddr,
@@ -625,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
625 struct ip_vs_service *svc; 606 struct ip_vs_service *svc;
626 __be16 port = dport; 607 __be16 port = dport;
627 608
628 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); 609 svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
629 if (!svc) 610 if (!svc)
630 return NULL; 611 return NULL;
631 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 612 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -633,12 +614,31 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
633 dest = ip_vs_lookup_dest(svc, daddr, port); 614 dest = ip_vs_lookup_dest(svc, daddr, port);
634 if (!dest) 615 if (!dest)
635 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); 616 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
636 if (dest)
637 atomic_inc(&dest->refcnt);
638 ip_vs_service_put(svc);
639 return dest; 617 return dest;
640} 618}
641 619
620void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
621{
622 struct ip_vs_dest_dst *dest_dst = container_of(head,
623 struct ip_vs_dest_dst,
624 rcu_head);
625
626 dst_release(dest_dst->dst_cache);
627 kfree(dest_dst);
628}
629
630/* Release dest_dst and dst_cache for dest in user context */
631static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
632{
633 struct ip_vs_dest_dst *old;
634
635 old = rcu_dereference_protected(dest->dest_dst, 1);
636 if (old) {
637 RCU_INIT_POINTER(dest->dest_dst, NULL);
638 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
639 }
640}
641
642/* 642/*
643 * Lookup dest by {svc,addr,port} in the destination trash. 643 * Lookup dest by {svc,addr,port} in the destination trash.
644 * The destination trash is used to hold the destinations that are removed 644 * The destination trash is used to hold the destinations that are removed
@@ -653,19 +653,25 @@ static struct ip_vs_dest *
653ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, 653ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
654 __be16 dport) 654 __be16 dport)
655{ 655{
656 struct ip_vs_dest *dest, *nxt; 656 struct ip_vs_dest *dest;
657 struct netns_ipvs *ipvs = net_ipvs(svc->net); 657 struct netns_ipvs *ipvs = net_ipvs(svc->net);
658 658
659 /* 659 /*
660 * Find the destination in trash 660 * Find the destination in trash
661 */ 661 */
662 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { 662 spin_lock_bh(&ipvs->dest_trash_lock);
663 list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
663 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 664 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
664 "dest->refcnt=%d\n", 665 "dest->refcnt=%d\n",
665 dest->vfwmark, 666 dest->vfwmark,
666 IP_VS_DBG_ADDR(svc->af, &dest->addr), 667 IP_VS_DBG_ADDR(svc->af, &dest->addr),
667 ntohs(dest->port), 668 ntohs(dest->port),
668 atomic_read(&dest->refcnt)); 669 atomic_read(&dest->refcnt));
670 /* We can not reuse dest while in grace period
671 * because conns still can use dest->svc
672 */
673 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
674 continue;
669 if (dest->af == svc->af && 675 if (dest->af == svc->af &&
670 ip_vs_addr_equal(svc->af, &dest->addr, daddr) && 676 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
671 dest->port == dport && 677 dest->port == dport &&
@@ -675,29 +681,27 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
675 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 681 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
676 dest->vport == svc->port))) { 682 dest->vport == svc->port))) {
677 /* HIT */ 683 /* HIT */
678 return dest; 684 list_del(&dest->t_list);
679 } 685 ip_vs_dest_hold(dest);
680 686 goto out;
681 /*
682 * Try to purge the destination from trash if not referenced
683 */
684 if (atomic_read(&dest->refcnt) == 1) {
685 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
686 "from trash\n",
687 dest->vfwmark,
688 IP_VS_DBG_ADDR(svc->af, &dest->addr),
689 ntohs(dest->port));
690 list_del(&dest->n_list);
691 ip_vs_dst_reset(dest);
692 __ip_vs_unbind_svc(dest);
693 free_percpu(dest->stats.cpustats);
694 kfree(dest);
695 } 687 }
696 } 688 }
697 689
698 return NULL; 690 dest = NULL;
691
692out:
693 spin_unlock_bh(&ipvs->dest_trash_lock);
694
695 return dest;
699} 696}
700 697
698static void ip_vs_dest_free(struct ip_vs_dest *dest)
699{
700 __ip_vs_dst_cache_reset(dest);
701 __ip_vs_unbind_svc(dest);
702 free_percpu(dest->stats.cpustats);
703 kfree(dest);
704}
701 705
702/* 706/*
703 * Clean up all the destinations in the trash 707 * Clean up all the destinations in the trash
@@ -706,19 +710,18 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
706 * When the ip_vs_control_clearup is activated by ipvs module exit, 710 * When the ip_vs_control_clearup is activated by ipvs module exit,
707 * the service tables must have been flushed and all the connections 711 * the service tables must have been flushed and all the connections
708 * are expired, and the refcnt of each destination in the trash must 712 * are expired, and the refcnt of each destination in the trash must
709 * be 1, so we simply release them here. 713 * be 0, so we simply release them here.
710 */ 714 */
711static void ip_vs_trash_cleanup(struct net *net) 715static void ip_vs_trash_cleanup(struct net *net)
712{ 716{
713 struct ip_vs_dest *dest, *nxt; 717 struct ip_vs_dest *dest, *nxt;
714 struct netns_ipvs *ipvs = net_ipvs(net); 718 struct netns_ipvs *ipvs = net_ipvs(net);
715 719
716 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { 720 del_timer_sync(&ipvs->dest_trash_timer);
717 list_del(&dest->n_list); 721 /* No need to use dest_trash_lock */
718 ip_vs_dst_reset(dest); 722 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
719 __ip_vs_unbind_svc(dest); 723 list_del(&dest->t_list);
720 free_percpu(dest->stats.cpustats); 724 ip_vs_dest_free(dest);
721 kfree(dest);
722 } 725 }
723} 726}
724 727
@@ -768,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
768 struct ip_vs_dest_user_kern *udest, int add) 771 struct ip_vs_dest_user_kern *udest, int add)
769{ 772{
770 struct netns_ipvs *ipvs = net_ipvs(svc->net); 773 struct netns_ipvs *ipvs = net_ipvs(svc->net);
774 struct ip_vs_scheduler *sched;
771 int conn_flags; 775 int conn_flags;
772 776
773 /* set the weight and the flags */ 777 /* set the weight and the flags */
@@ -783,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
783 * Put the real service in rs_table if not present. 787 * Put the real service in rs_table if not present.
784 * For now only for NAT! 788 * For now only for NAT!
785 */ 789 */
786 write_lock_bh(&ipvs->rs_lock);
787 ip_vs_rs_hash(ipvs, dest); 790 ip_vs_rs_hash(ipvs, dest);
788 write_unlock_bh(&ipvs->rs_lock);
789 } 791 }
790 atomic_set(&dest->conn_flags, conn_flags); 792 atomic_set(&dest->conn_flags, conn_flags);
791 793
@@ -809,27 +811,20 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
809 dest->l_threshold = udest->l_threshold; 811 dest->l_threshold = udest->l_threshold;
810 812
811 spin_lock_bh(&dest->dst_lock); 813 spin_lock_bh(&dest->dst_lock);
812 ip_vs_dst_reset(dest); 814 __ip_vs_dst_cache_reset(dest);
813 spin_unlock_bh(&dest->dst_lock); 815 spin_unlock_bh(&dest->dst_lock);
814 816
815 if (add) 817 sched = rcu_dereference_protected(svc->scheduler, 1);
816 ip_vs_start_estimator(svc->net, &dest->stats);
817
818 write_lock_bh(&__ip_vs_svc_lock);
819
820 /* Wait until all other svc users go away */
821 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
822
823 if (add) { 818 if (add) {
824 list_add(&dest->n_list, &svc->destinations); 819 ip_vs_start_estimator(svc->net, &dest->stats);
820 list_add_rcu(&dest->n_list, &svc->destinations);
825 svc->num_dests++; 821 svc->num_dests++;
822 if (sched->add_dest)
823 sched->add_dest(svc, dest);
824 } else {
825 if (sched->upd_dest)
826 sched->upd_dest(svc, dest);
826 } 827 }
827
828 /* call the update_service, because server weight may be changed */
829 if (svc->scheduler->update_service)
830 svc->scheduler->update_service(svc);
831
832 write_unlock_bh(&__ip_vs_svc_lock);
833} 828}
834 829
835 830
@@ -881,7 +876,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
881 atomic_set(&dest->persistconns, 0); 876 atomic_set(&dest->persistconns, 0);
882 atomic_set(&dest->refcnt, 1); 877 atomic_set(&dest->refcnt, 1);
883 878
884 INIT_LIST_HEAD(&dest->d_list); 879 INIT_HLIST_NODE(&dest->d_list);
885 spin_lock_init(&dest->dst_lock); 880 spin_lock_init(&dest->dst_lock);
886 spin_lock_init(&dest->stats.lock); 881 spin_lock_init(&dest->stats.lock);
887 __ip_vs_update_dest(svc, dest, udest, 1); 882 __ip_vs_update_dest(svc, dest, udest, 1);
@@ -923,10 +918,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
923 918
924 ip_vs_addr_copy(svc->af, &daddr, &udest->addr); 919 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
925 920
926 /* 921 /* We use function that requires RCU lock */
927 * Check if the dest already exists in the list 922 rcu_read_lock();
928 */
929 dest = ip_vs_lookup_dest(svc, &daddr, dport); 923 dest = ip_vs_lookup_dest(svc, &daddr, dport);
924 rcu_read_unlock();
930 925
931 if (dest != NULL) { 926 if (dest != NULL) {
932 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 927 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
@@ -948,11 +943,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
948 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 943 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
949 ntohs(dest->vport)); 944 ntohs(dest->vport));
950 945
951 /*
952 * Get the destination from the trash
953 */
954 list_del(&dest->n_list);
955
956 __ip_vs_update_dest(svc, dest, udest, 1); 946 __ip_vs_update_dest(svc, dest, udest, 1);
957 ret = 0; 947 ret = 0;
958 } else { 948 } else {
@@ -992,10 +982,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
992 982
993 ip_vs_addr_copy(svc->af, &daddr, &udest->addr); 983 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
994 984
995 /* 985 /* We use function that requires RCU lock */
996 * Lookup the destination list 986 rcu_read_lock();
997 */
998 dest = ip_vs_lookup_dest(svc, &daddr, dport); 987 dest = ip_vs_lookup_dest(svc, &daddr, dport);
988 rcu_read_unlock();
999 989
1000 if (dest == NULL) { 990 if (dest == NULL) {
1001 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 991 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
@@ -1008,11 +998,21 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1008 return 0; 998 return 0;
1009} 999}
1010 1000
1001static void ip_vs_dest_wait_readers(struct rcu_head *head)
1002{
1003 struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
1004 rcu_head);
1005
1006 /* End of grace period after unlinking */
1007 clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1008}
1009
1011 1010
1012/* 1011/*
1013 * Delete a destination (must be already unlinked from the service) 1012 * Delete a destination (must be already unlinked from the service)
1014 */ 1013 */
1015static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) 1014static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
1015 bool cleanup)
1016{ 1016{
1017 struct netns_ipvs *ipvs = net_ipvs(net); 1017 struct netns_ipvs *ipvs = net_ipvs(net);
1018 1018
@@ -1021,38 +1021,24 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1021 /* 1021 /*
1022 * Remove it from the d-linked list with the real services. 1022 * Remove it from the d-linked list with the real services.
1023 */ 1023 */
1024 write_lock_bh(&ipvs->rs_lock);
1025 ip_vs_rs_unhash(dest); 1024 ip_vs_rs_unhash(dest);
1026 write_unlock_bh(&ipvs->rs_lock);
1027 1025
1028 /* 1026 if (!cleanup) {
1029 * Decrease the refcnt of the dest, and free the dest 1027 set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1030 * if nobody refers to it (refcnt=0). Otherwise, throw 1028 call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
1031 * the destination into the trash.
1032 */
1033 if (atomic_dec_and_test(&dest->refcnt)) {
1034 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1035 dest->vfwmark,
1036 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1037 ntohs(dest->port));
1038 ip_vs_dst_reset(dest);
1039 /* simply decrease svc->refcnt here, let the caller check
1040 and release the service if nobody refers to it.
1041 Only user context can release destination and service,
1042 and only one user context can update virtual service at a
1043 time, so the operation here is OK */
1044 atomic_dec(&dest->svc->refcnt);
1045 free_percpu(dest->stats.cpustats);
1046 kfree(dest);
1047 } else {
1048 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1049 "dest->refcnt=%d\n",
1050 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1051 ntohs(dest->port),
1052 atomic_read(&dest->refcnt));
1053 list_add(&dest->n_list, &ipvs->dest_trash);
1054 atomic_inc(&dest->refcnt);
1055 } 1029 }
1030
1031 spin_lock_bh(&ipvs->dest_trash_lock);
1032 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1033 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1034 atomic_read(&dest->refcnt));
1035 if (list_empty(&ipvs->dest_trash) && !cleanup)
1036 mod_timer(&ipvs->dest_trash_timer,
1037 jiffies + IP_VS_DEST_TRASH_PERIOD);
1038 /* dest lives in trash without reference */
1039 list_add(&dest->t_list, &ipvs->dest_trash);
1040 spin_unlock_bh(&ipvs->dest_trash_lock);
1041 ip_vs_dest_put(dest);
1056} 1042}
1057 1043
1058 1044
@@ -1068,14 +1054,16 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1068 /* 1054 /*
1069 * Remove it from the d-linked destination list. 1055 * Remove it from the d-linked destination list.
1070 */ 1056 */
1071 list_del(&dest->n_list); 1057 list_del_rcu(&dest->n_list);
1072 svc->num_dests--; 1058 svc->num_dests--;
1073 1059
1074 /* 1060 if (svcupd) {
1075 * Call the update_service function of its scheduler 1061 struct ip_vs_scheduler *sched;
1076 */ 1062
1077 if (svcupd && svc->scheduler->update_service) 1063 sched = rcu_dereference_protected(svc->scheduler, 1);
1078 svc->scheduler->update_service(svc); 1064 if (sched->del_dest)
1065 sched->del_dest(svc, dest);
1066 }
1079} 1067}
1080 1068
1081 1069
@@ -1090,37 +1078,56 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1090 1078
1091 EnterFunction(2); 1079 EnterFunction(2);
1092 1080
1081 /* We use function that requires RCU lock */
1082 rcu_read_lock();
1093 dest = ip_vs_lookup_dest(svc, &udest->addr, dport); 1083 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1084 rcu_read_unlock();
1094 1085
1095 if (dest == NULL) { 1086 if (dest == NULL) {
1096 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1087 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1097 return -ENOENT; 1088 return -ENOENT;
1098 } 1089 }
1099 1090
1100 write_lock_bh(&__ip_vs_svc_lock);
1101
1102 /*
1103 * Wait until all other svc users go away.
1104 */
1105 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1106
1107 /* 1091 /*
1108 * Unlink dest from the service 1092 * Unlink dest from the service
1109 */ 1093 */
1110 __ip_vs_unlink_dest(svc, dest, 1); 1094 __ip_vs_unlink_dest(svc, dest, 1);
1111 1095
1112 write_unlock_bh(&__ip_vs_svc_lock);
1113
1114 /* 1096 /*
1115 * Delete the destination 1097 * Delete the destination
1116 */ 1098 */
1117 __ip_vs_del_dest(svc->net, dest); 1099 __ip_vs_del_dest(svc->net, dest, false);
1118 1100
1119 LeaveFunction(2); 1101 LeaveFunction(2);
1120 1102
1121 return 0; 1103 return 0;
1122} 1104}
1123 1105
1106static void ip_vs_dest_trash_expire(unsigned long data)
1107{
1108 struct net *net = (struct net *) data;
1109 struct netns_ipvs *ipvs = net_ipvs(net);
1110 struct ip_vs_dest *dest, *next;
1111
1112 spin_lock(&ipvs->dest_trash_lock);
1113 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1114 /* Skip if dest is in grace period */
1115 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
1116 continue;
1117 if (atomic_read(&dest->refcnt) > 0)
1118 continue;
1119 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1120 dest->vfwmark,
1121 IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
1122 ntohs(dest->port));
1123 list_del(&dest->t_list);
1124 ip_vs_dest_free(dest);
1125 }
1126 if (!list_empty(&ipvs->dest_trash))
1127 mod_timer(&ipvs->dest_trash_timer,
1128 jiffies + IP_VS_DEST_TRASH_PERIOD);
1129 spin_unlock(&ipvs->dest_trash_lock);
1130}
1124 1131
1125/* 1132/*
1126 * Add a service into the service hash table 1133 * Add a service into the service hash table
@@ -1176,7 +1183,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1176 } 1183 }
1177 1184
1178 /* I'm the first user of the service */ 1185 /* I'm the first user of the service */
1179 atomic_set(&svc->usecnt, 0);
1180 atomic_set(&svc->refcnt, 0); 1186 atomic_set(&svc->refcnt, 0);
1181 1187
1182 svc->af = u->af; 1188 svc->af = u->af;
@@ -1190,7 +1196,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1190 svc->net = net; 1196 svc->net = net;
1191 1197
1192 INIT_LIST_HEAD(&svc->destinations); 1198 INIT_LIST_HEAD(&svc->destinations);
1193 rwlock_init(&svc->sched_lock); 1199 spin_lock_init(&svc->sched_lock);
1194 spin_lock_init(&svc->stats.lock); 1200 spin_lock_init(&svc->stats.lock);
1195 1201
1196 /* Bind the scheduler */ 1202 /* Bind the scheduler */
@@ -1200,7 +1206,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1200 sched = NULL; 1206 sched = NULL;
1201 1207
1202 /* Bind the ct retriever */ 1208 /* Bind the ct retriever */
1203 ip_vs_bind_pe(svc, pe); 1209 RCU_INIT_POINTER(svc->pe, pe);
1204 pe = NULL; 1210 pe = NULL;
1205 1211
1206 /* Update the virtual service counters */ 1212 /* Update the virtual service counters */
@@ -1216,9 +1222,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1216 ipvs->num_services++; 1222 ipvs->num_services++;
1217 1223
1218 /* Hash the service into the service table */ 1224 /* Hash the service into the service table */
1219 write_lock_bh(&__ip_vs_svc_lock);
1220 ip_vs_svc_hash(svc); 1225 ip_vs_svc_hash(svc);
1221 write_unlock_bh(&__ip_vs_svc_lock);
1222 1226
1223 *svc_p = svc; 1227 *svc_p = svc;
1224 /* Now there is a service - full throttle */ 1228 /* Now there is a service - full throttle */
@@ -1228,15 +1232,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1228 1232
1229 out_err: 1233 out_err:
1230 if (svc != NULL) { 1234 if (svc != NULL) {
1231 ip_vs_unbind_scheduler(svc); 1235 ip_vs_unbind_scheduler(svc, sched);
1232 if (svc->inc) { 1236 ip_vs_service_free(svc);
1233 local_bh_disable();
1234 ip_vs_app_inc_put(svc->inc);
1235 local_bh_enable();
1236 }
1237 if (svc->stats.cpustats)
1238 free_percpu(svc->stats.cpustats);
1239 kfree(svc);
1240 } 1237 }
1241 ip_vs_scheduler_put(sched); 1238 ip_vs_scheduler_put(sched);
1242 ip_vs_pe_put(pe); 1239 ip_vs_pe_put(pe);
@@ -1286,12 +1283,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1286 } 1283 }
1287#endif 1284#endif
1288 1285
1289 write_lock_bh(&__ip_vs_svc_lock); 1286 old_sched = rcu_dereference_protected(svc->scheduler, 1);
1290 1287 if (sched != old_sched) {
1291 /* 1288 /* Bind the new scheduler */
1292 * Wait until all other svc users go away. 1289 ret = ip_vs_bind_scheduler(svc, sched);
1293 */ 1290 if (ret) {
1294 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); 1291 old_sched = sched;
1292 goto out;
1293 }
1294 /* Unbind the old scheduler on success */
1295 ip_vs_unbind_scheduler(svc, old_sched);
1296 }
1295 1297
1296 /* 1298 /*
1297 * Set the flags and timeout value 1299 * Set the flags and timeout value
@@ -1300,57 +1302,30 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1300 svc->timeout = u->timeout * HZ; 1302 svc->timeout = u->timeout * HZ;
1301 svc->netmask = u->netmask; 1303 svc->netmask = u->netmask;
1302 1304
1303 old_sched = svc->scheduler; 1305 old_pe = rcu_dereference_protected(svc->pe, 1);
1304 if (sched != old_sched) { 1306 if (pe != old_pe)
1305 /* 1307 rcu_assign_pointer(svc->pe, pe);
1306 * Unbind the old scheduler
1307 */
1308 if ((ret = ip_vs_unbind_scheduler(svc))) {
1309 old_sched = sched;
1310 goto out_unlock;
1311 }
1312
1313 /*
1314 * Bind the new scheduler
1315 */
1316 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1317 /*
1318 * If ip_vs_bind_scheduler fails, restore the old
1319 * scheduler.
1320 * The main reason of failure is out of memory.
1321 *
1322 * The question is if the old scheduler can be
1323 * restored all the time. TODO: if it cannot be
1324 * restored some time, we must delete the service,
1325 * otherwise the system may crash.
1326 */
1327 ip_vs_bind_scheduler(svc, old_sched);
1328 old_sched = sched;
1329 goto out_unlock;
1330 }
1331 }
1332 1308
1333 old_pe = svc->pe;
1334 if (pe != old_pe) {
1335 ip_vs_unbind_pe(svc);
1336 ip_vs_bind_pe(svc, pe);
1337 }
1338
1339out_unlock:
1340 write_unlock_bh(&__ip_vs_svc_lock);
1341out: 1309out:
1342 ip_vs_scheduler_put(old_sched); 1310 ip_vs_scheduler_put(old_sched);
1343 ip_vs_pe_put(old_pe); 1311 ip_vs_pe_put(old_pe);
1344 return ret; 1312 return ret;
1345} 1313}
1346 1314
1315static void ip_vs_service_rcu_free(struct rcu_head *head)
1316{
1317 struct ip_vs_service *svc;
1318
1319 svc = container_of(head, struct ip_vs_service, rcu_head);
1320 ip_vs_service_free(svc);
1321}
1347 1322
1348/* 1323/*
1349 * Delete a service from the service list 1324 * Delete a service from the service list
1350 * - The service must be unlinked, unlocked and not referenced! 1325 * - The service must be unlinked, unlocked and not referenced!
1351 * - We are called under _bh lock 1326 * - We are called under _bh lock
1352 */ 1327 */
1353static void __ip_vs_del_service(struct ip_vs_service *svc) 1328static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1354{ 1329{
1355 struct ip_vs_dest *dest, *nxt; 1330 struct ip_vs_dest *dest, *nxt;
1356 struct ip_vs_scheduler *old_sched; 1331 struct ip_vs_scheduler *old_sched;
@@ -1366,27 +1341,20 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1366 ip_vs_stop_estimator(svc->net, &svc->stats); 1341 ip_vs_stop_estimator(svc->net, &svc->stats);
1367 1342
1368 /* Unbind scheduler */ 1343 /* Unbind scheduler */
1369 old_sched = svc->scheduler; 1344 old_sched = rcu_dereference_protected(svc->scheduler, 1);
1370 ip_vs_unbind_scheduler(svc); 1345 ip_vs_unbind_scheduler(svc, old_sched);
1371 ip_vs_scheduler_put(old_sched); 1346 ip_vs_scheduler_put(old_sched);
1372 1347
1373 /* Unbind persistence engine */ 1348 /* Unbind persistence engine, keep svc->pe */
1374 old_pe = svc->pe; 1349 old_pe = rcu_dereference_protected(svc->pe, 1);
1375 ip_vs_unbind_pe(svc);
1376 ip_vs_pe_put(old_pe); 1350 ip_vs_pe_put(old_pe);
1377 1351
1378 /* Unbind app inc */
1379 if (svc->inc) {
1380 ip_vs_app_inc_put(svc->inc);
1381 svc->inc = NULL;
1382 }
1383
1384 /* 1352 /*
1385 * Unlink the whole destination list 1353 * Unlink the whole destination list
1386 */ 1354 */
1387 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1355 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1388 __ip_vs_unlink_dest(svc, dest, 0); 1356 __ip_vs_unlink_dest(svc, dest, 0);
1389 __ip_vs_del_dest(svc->net, dest); 1357 __ip_vs_del_dest(svc->net, dest, cleanup);
1390 } 1358 }
1391 1359
1392 /* 1360 /*
@@ -1400,13 +1368,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1400 /* 1368 /*
1401 * Free the service if nobody refers to it 1369 * Free the service if nobody refers to it
1402 */ 1370 */
1403 if (atomic_read(&svc->refcnt) == 0) { 1371 if (atomic_dec_and_test(&svc->refcnt)) {
1404 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", 1372 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
1405 svc->fwmark, 1373 svc->fwmark,
1406 IP_VS_DBG_ADDR(svc->af, &svc->addr), 1374 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1407 ntohs(svc->port), atomic_read(&svc->usecnt)); 1375 ntohs(svc->port));
1408 free_percpu(svc->stats.cpustats); 1376 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
1409 kfree(svc);
1410 } 1377 }
1411 1378
1412 /* decrease the module use count */ 1379 /* decrease the module use count */
@@ -1416,23 +1383,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1416/* 1383/*
1417 * Unlink a service from list and try to delete it if its refcnt reached 0 1384 * Unlink a service from list and try to delete it if its refcnt reached 0
1418 */ 1385 */
1419static void ip_vs_unlink_service(struct ip_vs_service *svc) 1386static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
1420{ 1387{
1388 /* Hold svc to avoid double release from dest_trash */
1389 atomic_inc(&svc->refcnt);
1421 /* 1390 /*
1422 * Unhash it from the service table 1391 * Unhash it from the service table
1423 */ 1392 */
1424 write_lock_bh(&__ip_vs_svc_lock);
1425
1426 ip_vs_svc_unhash(svc); 1393 ip_vs_svc_unhash(svc);
1427 1394
1428 /* 1395 __ip_vs_del_service(svc, cleanup);
1429 * Wait until all the svc users go away.
1430 */
1431 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1432
1433 __ip_vs_del_service(svc);
1434
1435 write_unlock_bh(&__ip_vs_svc_lock);
1436} 1396}
1437 1397
1438/* 1398/*
@@ -1442,7 +1402,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1442{ 1402{
1443 if (svc == NULL) 1403 if (svc == NULL)
1444 return -EEXIST; 1404 return -EEXIST;
1445 ip_vs_unlink_service(svc); 1405 ip_vs_unlink_service(svc, false);
1446 1406
1447 return 0; 1407 return 0;
1448} 1408}
@@ -1451,19 +1411,20 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1451/* 1411/*
1452 * Flush all the virtual services 1412 * Flush all the virtual services
1453 */ 1413 */
1454static int ip_vs_flush(struct net *net) 1414static int ip_vs_flush(struct net *net, bool cleanup)
1455{ 1415{
1456 int idx; 1416 int idx;
1457 struct ip_vs_service *svc, *nxt; 1417 struct ip_vs_service *svc;
1418 struct hlist_node *n;
1458 1419
1459 /* 1420 /*
1460 * Flush the service table hashed by <netns,protocol,addr,port> 1421 * Flush the service table hashed by <netns,protocol,addr,port>
1461 */ 1422 */
1462 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1423 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1463 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], 1424 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
1464 s_list) { 1425 s_list) {
1465 if (net_eq(svc->net, net)) 1426 if (net_eq(svc->net, net))
1466 ip_vs_unlink_service(svc); 1427 ip_vs_unlink_service(svc, cleanup);
1467 } 1428 }
1468 } 1429 }
1469 1430
@@ -1471,10 +1432,10 @@ static int ip_vs_flush(struct net *net)
1471 * Flush the service table hashed by fwmark 1432 * Flush the service table hashed by fwmark
1472 */ 1433 */
1473 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1434 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474 list_for_each_entry_safe(svc, nxt, 1435 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
1475 &ip_vs_svc_fwm_table[idx], f_list) { 1436 f_list) {
1476 if (net_eq(svc->net, net)) 1437 if (net_eq(svc->net, net))
1477 ip_vs_unlink_service(svc); 1438 ip_vs_unlink_service(svc, cleanup);
1478 } 1439 }
1479 } 1440 }
1480 1441
@@ -1490,32 +1451,29 @@ void ip_vs_service_net_cleanup(struct net *net)
1490 EnterFunction(2); 1451 EnterFunction(2);
1491 /* Check for "full" addressed entries */ 1452 /* Check for "full" addressed entries */
1492 mutex_lock(&__ip_vs_mutex); 1453 mutex_lock(&__ip_vs_mutex);
1493 ip_vs_flush(net); 1454 ip_vs_flush(net, true);
1494 mutex_unlock(&__ip_vs_mutex); 1455 mutex_unlock(&__ip_vs_mutex);
1495 LeaveFunction(2); 1456 LeaveFunction(2);
1496} 1457}
1497/* 1458
1498 * Release dst hold by dst_cache 1459/* Put all references for device (dst_cache) */
1499 */
1500static inline void 1460static inline void
1501__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) 1461ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
1502{ 1462{
1503 spin_lock_bh(&dest->dst_lock); 1463 spin_lock_bh(&dest->dst_lock);
1504 if (dest->dst_cache && dest->dst_cache->dev == dev) { 1464 if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) {
1505 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", 1465 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1506 dev->name, 1466 dev->name,
1507 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1467 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1508 ntohs(dest->port), 1468 ntohs(dest->port),
1509 atomic_read(&dest->refcnt)); 1469 atomic_read(&dest->refcnt));
1510 ip_vs_dst_reset(dest); 1470 __ip_vs_dst_cache_reset(dest);
1511 } 1471 }
1512 spin_unlock_bh(&dest->dst_lock); 1472 spin_unlock_bh(&dest->dst_lock);
1513 1473
1514} 1474}
1515/* 1475/* Netdev event receiver
1516 * Netdev event receiver 1476 * Currently only NETDEV_DOWN is handled to release refs to cached dsts
1517 * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1518 * a device that is "unregister" it must be released.
1519 */ 1477 */
1520static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1478static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1521 void *ptr) 1479 void *ptr)
@@ -1527,35 +1485,37 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1527 struct ip_vs_dest *dest; 1485 struct ip_vs_dest *dest;
1528 unsigned int idx; 1486 unsigned int idx;
1529 1487
1530 if (event != NETDEV_UNREGISTER || !ipvs) 1488 if (event != NETDEV_DOWN || !ipvs)
1531 return NOTIFY_DONE; 1489 return NOTIFY_DONE;
1532 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); 1490 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1533 EnterFunction(2); 1491 EnterFunction(2);
1534 mutex_lock(&__ip_vs_mutex); 1492 mutex_lock(&__ip_vs_mutex);
1535 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1493 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1536 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1494 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1537 if (net_eq(svc->net, net)) { 1495 if (net_eq(svc->net, net)) {
1538 list_for_each_entry(dest, &svc->destinations, 1496 list_for_each_entry(dest, &svc->destinations,
1539 n_list) { 1497 n_list) {
1540 __ip_vs_dev_reset(dest, dev); 1498 ip_vs_forget_dev(dest, dev);
1541 } 1499 }
1542 } 1500 }
1543 } 1501 }
1544 1502
1545 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1503 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1546 if (net_eq(svc->net, net)) { 1504 if (net_eq(svc->net, net)) {
1547 list_for_each_entry(dest, &svc->destinations, 1505 list_for_each_entry(dest, &svc->destinations,
1548 n_list) { 1506 n_list) {
1549 __ip_vs_dev_reset(dest, dev); 1507 ip_vs_forget_dev(dest, dev);
1550 } 1508 }
1551 } 1509 }
1552 1510
1553 } 1511 }
1554 } 1512 }
1555 1513
1556 list_for_each_entry(dest, &ipvs->dest_trash, n_list) { 1514 spin_lock_bh(&ipvs->dest_trash_lock);
1557 __ip_vs_dev_reset(dest, dev); 1515 list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
1516 ip_vs_forget_dev(dest, dev);
1558 } 1517 }
1518 spin_unlock_bh(&ipvs->dest_trash_lock);
1559 mutex_unlock(&__ip_vs_mutex); 1519 mutex_unlock(&__ip_vs_mutex);
1560 LeaveFunction(2); 1520 LeaveFunction(2);
1561 return NOTIFY_DONE; 1521 return NOTIFY_DONE;
@@ -1568,12 +1528,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
1568{ 1528{
1569 struct ip_vs_dest *dest; 1529 struct ip_vs_dest *dest;
1570 1530
1571 write_lock_bh(&__ip_vs_svc_lock);
1572 list_for_each_entry(dest, &svc->destinations, n_list) { 1531 list_for_each_entry(dest, &svc->destinations, n_list) {
1573 ip_vs_zero_stats(&dest->stats); 1532 ip_vs_zero_stats(&dest->stats);
1574 } 1533 }
1575 ip_vs_zero_stats(&svc->stats); 1534 ip_vs_zero_stats(&svc->stats);
1576 write_unlock_bh(&__ip_vs_svc_lock);
1577 return 0; 1535 return 0;
1578} 1536}
1579 1537
@@ -1583,14 +1541,14 @@ static int ip_vs_zero_all(struct net *net)
1583 struct ip_vs_service *svc; 1541 struct ip_vs_service *svc;
1584 1542
1585 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1543 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1586 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1544 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1587 if (net_eq(svc->net, net)) 1545 if (net_eq(svc->net, net))
1588 ip_vs_zero_service(svc); 1546 ip_vs_zero_service(svc);
1589 } 1547 }
1590 } 1548 }
1591 1549
1592 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1550 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1593 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1551 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1594 if (net_eq(svc->net, net)) 1552 if (net_eq(svc->net, net))
1595 ip_vs_zero_service(svc); 1553 ip_vs_zero_service(svc);
1596 } 1554 }
@@ -1918,7 +1876,7 @@ static struct ctl_table vs_vars[] = {
1918 1876
1919struct ip_vs_iter { 1877struct ip_vs_iter {
1920 struct seq_net_private p; /* Do not move this, netns depends upon it*/ 1878 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1921 struct list_head *table; 1879 struct hlist_head *table;
1922 int bucket; 1880 int bucket;
1923}; 1881};
1924 1882
@@ -1951,7 +1909,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1951 1909
1952 /* look in hash by protocol */ 1910 /* look in hash by protocol */
1953 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1911 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1954 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1912 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
1955 if (net_eq(svc->net, net) && pos-- == 0) { 1913 if (net_eq(svc->net, net) && pos-- == 0) {
1956 iter->table = ip_vs_svc_table; 1914 iter->table = ip_vs_svc_table;
1957 iter->bucket = idx; 1915 iter->bucket = idx;
@@ -1962,7 +1920,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1962 1920
1963 /* keep looking in fwmark */ 1921 /* keep looking in fwmark */
1964 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1922 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1965 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1923 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
1924 f_list) {
1966 if (net_eq(svc->net, net) && pos-- == 0) { 1925 if (net_eq(svc->net, net) && pos-- == 0) {
1967 iter->table = ip_vs_svc_fwm_table; 1926 iter->table = ip_vs_svc_fwm_table;
1968 iter->bucket = idx; 1927 iter->bucket = idx;
@@ -1975,17 +1934,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1975} 1934}
1976 1935
1977static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 1936static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1978__acquires(__ip_vs_svc_lock)
1979{ 1937{
1980 1938
1981 read_lock_bh(&__ip_vs_svc_lock); 1939 rcu_read_lock();
1982 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 1940 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1983} 1941}
1984 1942
1985 1943
1986static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1944static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1987{ 1945{
1988 struct list_head *e; 1946 struct hlist_node *e;
1989 struct ip_vs_iter *iter; 1947 struct ip_vs_iter *iter;
1990 struct ip_vs_service *svc; 1948 struct ip_vs_service *svc;
1991 1949
@@ -1998,13 +1956,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1998 1956
1999 if (iter->table == ip_vs_svc_table) { 1957 if (iter->table == ip_vs_svc_table) {
2000 /* next service in table hashed by protocol */ 1958 /* next service in table hashed by protocol */
2001 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket]) 1959 e = rcu_dereference(hlist_next_rcu(&svc->s_list));
2002 return list_entry(e, struct ip_vs_service, s_list); 1960 if (e)
2003 1961 return hlist_entry(e, struct ip_vs_service, s_list);
2004 1962
2005 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1963 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2006 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket], 1964 hlist_for_each_entry_rcu(svc,
2007 s_list) { 1965 &ip_vs_svc_table[iter->bucket],
1966 s_list) {
2008 return svc; 1967 return svc;
2009 } 1968 }
2010 } 1969 }
@@ -2015,13 +1974,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2015 } 1974 }
2016 1975
2017 /* next service in hashed by fwmark */ 1976 /* next service in hashed by fwmark */
2018 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket]) 1977 e = rcu_dereference(hlist_next_rcu(&svc->f_list));
2019 return list_entry(e, struct ip_vs_service, f_list); 1978 if (e)
1979 return hlist_entry(e, struct ip_vs_service, f_list);
2020 1980
2021 scan_fwmark: 1981 scan_fwmark:
2022 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1982 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2023 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket], 1983 hlist_for_each_entry_rcu(svc,
2024 f_list) 1984 &ip_vs_svc_fwm_table[iter->bucket],
1985 f_list)
2025 return svc; 1986 return svc;
2026 } 1987 }
2027 1988
@@ -2029,9 +1990,8 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2029} 1990}
2030 1991
2031static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 1992static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2032__releases(__ip_vs_svc_lock)
2033{ 1993{
2034 read_unlock_bh(&__ip_vs_svc_lock); 1994 rcu_read_unlock();
2035} 1995}
2036 1996
2037 1997
@@ -2049,6 +2009,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2049 const struct ip_vs_service *svc = v; 2009 const struct ip_vs_service *svc = v;
2050 const struct ip_vs_iter *iter = seq->private; 2010 const struct ip_vs_iter *iter = seq->private;
2051 const struct ip_vs_dest *dest; 2011 const struct ip_vs_dest *dest;
2012 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2052 2013
2053 if (iter->table == ip_vs_svc_table) { 2014 if (iter->table == ip_vs_svc_table) {
2054#ifdef CONFIG_IP_VS_IPV6 2015#ifdef CONFIG_IP_VS_IPV6
@@ -2057,18 +2018,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2057 ip_vs_proto_name(svc->protocol), 2018 ip_vs_proto_name(svc->protocol),
2058 &svc->addr.in6, 2019 &svc->addr.in6,
2059 ntohs(svc->port), 2020 ntohs(svc->port),
2060 svc->scheduler->name); 2021 sched->name);
2061 else 2022 else
2062#endif 2023#endif
2063 seq_printf(seq, "%s %08X:%04X %s %s ", 2024 seq_printf(seq, "%s %08X:%04X %s %s ",
2064 ip_vs_proto_name(svc->protocol), 2025 ip_vs_proto_name(svc->protocol),
2065 ntohl(svc->addr.ip), 2026 ntohl(svc->addr.ip),
2066 ntohs(svc->port), 2027 ntohs(svc->port),
2067 svc->scheduler->name, 2028 sched->name,
2068 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2029 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2069 } else { 2030 } else {
2070 seq_printf(seq, "FWM %08X %s %s", 2031 seq_printf(seq, "FWM %08X %s %s",
2071 svc->fwmark, svc->scheduler->name, 2032 svc->fwmark, sched->name,
2072 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2033 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2073 } 2034 }
2074 2035
@@ -2079,7 +2040,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2079 else 2040 else
2080 seq_putc(seq, '\n'); 2041 seq_putc(seq, '\n');
2081 2042
2082 list_for_each_entry(dest, &svc->destinations, n_list) { 2043 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
2083#ifdef CONFIG_IP_VS_IPV6 2044#ifdef CONFIG_IP_VS_IPV6
2084 if (dest->af == AF_INET6) 2045 if (dest->af == AF_INET6)
2085 seq_printf(seq, 2046 seq_printf(seq,
@@ -2389,7 +2350,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2389 2350
2390 if (cmd == IP_VS_SO_SET_FLUSH) { 2351 if (cmd == IP_VS_SO_SET_FLUSH) {
2391 /* Flush the virtual service */ 2352 /* Flush the virtual service */
2392 ret = ip_vs_flush(net); 2353 ret = ip_vs_flush(net, false);
2393 goto out_unlock; 2354 goto out_unlock;
2394 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2355 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2395 /* Set timeout values for (tcp tcpfin udp) */ 2356 /* Set timeout values for (tcp tcpfin udp) */
@@ -2424,11 +2385,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2424 } 2385 }
2425 2386
2426 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2387 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2388 rcu_read_lock();
2427 if (usvc.fwmark == 0) 2389 if (usvc.fwmark == 0)
2428 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, 2390 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2429 &usvc.addr, usvc.port); 2391 &usvc.addr, usvc.port);
2430 else 2392 else
2431 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); 2393 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2394 rcu_read_unlock();
2432 2395
2433 if (cmd != IP_VS_SO_SET_ADD 2396 if (cmd != IP_VS_SO_SET_ADD
2434 && (svc == NULL || svc->protocol != usvc.protocol)) { 2397 && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2480,11 +2443,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2480static void 2443static void
2481ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2444ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2482{ 2445{
2446 struct ip_vs_scheduler *sched;
2447
2448 sched = rcu_dereference_protected(src->scheduler, 1);
2483 dst->protocol = src->protocol; 2449 dst->protocol = src->protocol;
2484 dst->addr = src->addr.ip; 2450 dst->addr = src->addr.ip;
2485 dst->port = src->port; 2451 dst->port = src->port;
2486 dst->fwmark = src->fwmark; 2452 dst->fwmark = src->fwmark;
2487 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); 2453 strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
2488 dst->flags = src->flags; 2454 dst->flags = src->flags;
2489 dst->timeout = src->timeout / HZ; 2455 dst->timeout = src->timeout / HZ;
2490 dst->netmask = src->netmask; 2456 dst->netmask = src->netmask;
@@ -2503,7 +2469,7 @@ __ip_vs_get_service_entries(struct net *net,
2503 int ret = 0; 2469 int ret = 0;
2504 2470
2505 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2471 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2506 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2472 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2507 /* Only expose IPv4 entries to old interface */ 2473 /* Only expose IPv4 entries to old interface */
2508 if (svc->af != AF_INET || !net_eq(svc->net, net)) 2474 if (svc->af != AF_INET || !net_eq(svc->net, net))
2509 continue; 2475 continue;
@@ -2522,7 +2488,7 @@ __ip_vs_get_service_entries(struct net *net,
2522 } 2488 }
2523 2489
2524 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2490 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2525 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2491 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2526 /* Only expose IPv4 entries to old interface */ 2492 /* Only expose IPv4 entries to old interface */
2527 if (svc->af != AF_INET || !net_eq(svc->net, net)) 2493 if (svc->af != AF_INET || !net_eq(svc->net, net))
2528 continue; 2494 continue;
@@ -2551,11 +2517,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2551 union nf_inet_addr addr = { .ip = get->addr }; 2517 union nf_inet_addr addr = { .ip = get->addr };
2552 int ret = 0; 2518 int ret = 0;
2553 2519
2520 rcu_read_lock();
2554 if (get->fwmark) 2521 if (get->fwmark)
2555 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); 2522 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2556 else 2523 else
2557 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, 2524 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2558 get->port); 2525 get->port);
2526 rcu_read_unlock();
2559 2527
2560 if (svc) { 2528 if (svc) {
2561 int count = 0; 2529 int count = 0;
@@ -2738,12 +2706,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2738 2706
2739 entry = (struct ip_vs_service_entry *)arg; 2707 entry = (struct ip_vs_service_entry *)arg;
2740 addr.ip = entry->addr; 2708 addr.ip = entry->addr;
2709 rcu_read_lock();
2741 if (entry->fwmark) 2710 if (entry->fwmark)
2742 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); 2711 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2743 else 2712 else
2744 svc = __ip_vs_service_find(net, AF_INET, 2713 svc = __ip_vs_service_find(net, AF_INET,
2745 entry->protocol, &addr, 2714 entry->protocol, &addr,
2746 entry->port); 2715 entry->port);
2716 rcu_read_unlock();
2747 if (svc) { 2717 if (svc) {
2748 ip_vs_copy_service(entry, svc); 2718 ip_vs_copy_service(entry, svc);
2749 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2719 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2900,6 +2870,7 @@ nla_put_failure:
2900static int ip_vs_genl_fill_service(struct sk_buff *skb, 2870static int ip_vs_genl_fill_service(struct sk_buff *skb,
2901 struct ip_vs_service *svc) 2871 struct ip_vs_service *svc)
2902{ 2872{
2873 struct ip_vs_scheduler *sched;
2903 struct nlattr *nl_service; 2874 struct nlattr *nl_service;
2904 struct ip_vs_flags flags = { .flags = svc->flags, 2875 struct ip_vs_flags flags = { .flags = svc->flags,
2905 .mask = ~0 }; 2876 .mask = ~0 };
@@ -2920,7 +2891,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
2920 goto nla_put_failure; 2891 goto nla_put_failure;
2921 } 2892 }
2922 2893
2923 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) || 2894 sched = rcu_dereference_protected(svc->scheduler, 1);
2895 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
2924 (svc->pe && 2896 (svc->pe &&
2925 nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) || 2897 nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2926 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 2898 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
@@ -2971,7 +2943,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2971 2943
2972 mutex_lock(&__ip_vs_mutex); 2944 mutex_lock(&__ip_vs_mutex);
2973 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2945 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2974 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 2946 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2975 if (++idx <= start || !net_eq(svc->net, net)) 2947 if (++idx <= start || !net_eq(svc->net, net))
2976 continue; 2948 continue;
2977 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2949 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -2982,7 +2954,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2982 } 2954 }
2983 2955
2984 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2956 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2985 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 2957 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2986 if (++idx <= start || !net_eq(svc->net, net)) 2958 if (++idx <= start || !net_eq(svc->net, net))
2987 continue; 2959 continue;
2988 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2960 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -3042,11 +3014,13 @@ static int ip_vs_genl_parse_service(struct net *net,
3042 usvc->fwmark = 0; 3014 usvc->fwmark = 0;
3043 } 3015 }
3044 3016
3017 rcu_read_lock();
3045 if (usvc->fwmark) 3018 if (usvc->fwmark)
3046 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); 3019 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3047 else 3020 else
3048 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, 3021 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3049 &usvc->addr, usvc->port); 3022 &usvc->addr, usvc->port);
3023 rcu_read_unlock();
3050 *ret_svc = svc; 3024 *ret_svc = svc;
3051 3025
3052 /* If a full entry was requested, check for the additional fields */ 3026 /* If a full entry was requested, check for the additional fields */
@@ -3398,7 +3372,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3398 mutex_lock(&__ip_vs_mutex); 3372 mutex_lock(&__ip_vs_mutex);
3399 3373
3400 if (cmd == IPVS_CMD_FLUSH) { 3374 if (cmd == IPVS_CMD_FLUSH) {
3401 ret = ip_vs_flush(net); 3375 ret = ip_vs_flush(net, false);
3402 goto out; 3376 goto out;
3403 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3377 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3404 ret = ip_vs_genl_set_config(net, info->attrs); 3378 ret = ip_vs_genl_set_config(net, info->attrs);
@@ -3790,13 +3764,14 @@ int __net_init ip_vs_control_net_init(struct net *net)
3790 int idx; 3764 int idx;
3791 struct netns_ipvs *ipvs = net_ipvs(net); 3765 struct netns_ipvs *ipvs = net_ipvs(net);
3792 3766
3793 rwlock_init(&ipvs->rs_lock);
3794
3795 /* Initialize rs_table */ 3767 /* Initialize rs_table */
3796 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 3768 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3797 INIT_LIST_HEAD(&ipvs->rs_table[idx]); 3769 INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
3798 3770
3799 INIT_LIST_HEAD(&ipvs->dest_trash); 3771 INIT_LIST_HEAD(&ipvs->dest_trash);
3772 spin_lock_init(&ipvs->dest_trash_lock);
3773 setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
3774 (unsigned long) net);
3800 atomic_set(&ipvs->ftpsvc_counter, 0); 3775 atomic_set(&ipvs->ftpsvc_counter, 0);
3801 atomic_set(&ipvs->nullsvc_counter, 0); 3776 atomic_set(&ipvs->nullsvc_counter, 0);
3802 3777
@@ -3826,6 +3801,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
3826{ 3801{
3827 struct netns_ipvs *ipvs = net_ipvs(net); 3802 struct netns_ipvs *ipvs = net_ipvs(net);
3828 3803
3804 /* Some dest can be in grace period even before cleanup, we have to
3805 * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called.
3806 */
3807 rcu_barrier();
3829 ip_vs_trash_cleanup(net); 3808 ip_vs_trash_cleanup(net);
3830 ip_vs_stop_estimator(net, &ipvs->tot_stats); 3809 ip_vs_stop_estimator(net, &ipvs->tot_stats);
3831 ip_vs_control_net_cleanup_sysctl(net); 3810 ip_vs_control_net_cleanup_sysctl(net);
@@ -3871,10 +3850,10 @@ int __init ip_vs_control_init(void)
3871 3850
3872 EnterFunction(2); 3851 EnterFunction(2);
3873 3852
3874 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ 3853 /* Initialize svc_table, ip_vs_svc_fwm_table */
3875 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 3854 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3876 INIT_LIST_HEAD(&ip_vs_svc_table[idx]); 3855 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
3877 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); 3856 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3878 } 3857 }
3879 3858
3880 smp_wmb(); /* Do we really need it now ? */ 3859 smp_wmb(); /* Do we really need it now ? */
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 7f3b0cc00b7a..ccab120df45e 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -51,7 +51,7 @@
51 * IPVS DH bucket 51 * IPVS DH bucket
52 */ 52 */
53struct ip_vs_dh_bucket { 53struct ip_vs_dh_bucket {
54 struct ip_vs_dest *dest; /* real server (cache) */ 54 struct ip_vs_dest __rcu *dest; /* real server (cache) */
55}; 55};
56 56
57/* 57/*
@@ -64,6 +64,10 @@ struct ip_vs_dh_bucket {
64#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS) 64#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
65#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1) 65#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
66 66
67struct ip_vs_dh_state {
68 struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE];
69 struct rcu_head rcu_head;
70};
67 71
68/* 72/*
69 * Returns hash value for IPVS DH entry 73 * Returns hash value for IPVS DH entry
@@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
85 * Get ip_vs_dest associated with supplied parameters. 89 * Get ip_vs_dest associated with supplied parameters.
86 */ 90 */
87static inline struct ip_vs_dest * 91static inline struct ip_vs_dest *
88ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl, 92ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
89 const union nf_inet_addr *addr)
90{ 93{
91 return (tbl[ip_vs_dh_hashkey(af, addr)]).dest; 94 return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
92} 95}
93 96
94 97
@@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
96 * Assign all the hash buckets of the specified table with the service. 99 * Assign all the hash buckets of the specified table with the service.
97 */ 100 */
98static int 101static int
99ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) 102ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
100{ 103{
101 int i; 104 int i;
102 struct ip_vs_dh_bucket *b; 105 struct ip_vs_dh_bucket *b;
103 struct list_head *p; 106 struct list_head *p;
104 struct ip_vs_dest *dest; 107 struct ip_vs_dest *dest;
108 bool empty;
105 109
106 b = tbl; 110 b = &s->buckets[0];
107 p = &svc->destinations; 111 p = &svc->destinations;
112 empty = list_empty(p);
108 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { 113 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
109 if (list_empty(p)) { 114 dest = rcu_dereference_protected(b->dest, 1);
110 b->dest = NULL; 115 if (dest)
111 } else { 116 ip_vs_dest_put(dest);
117 if (empty)
118 RCU_INIT_POINTER(b->dest, NULL);
119 else {
112 if (p == &svc->destinations) 120 if (p == &svc->destinations)
113 p = p->next; 121 p = p->next;
114 122
115 dest = list_entry(p, struct ip_vs_dest, n_list); 123 dest = list_entry(p, struct ip_vs_dest, n_list);
116 atomic_inc(&dest->refcnt); 124 ip_vs_dest_hold(dest);
117 b->dest = dest; 125 RCU_INIT_POINTER(b->dest, dest);
118 126
119 p = p->next; 127 p = p->next;
120 } 128 }
@@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
127/* 135/*
128 * Flush all the hash buckets of the specified table. 136 * Flush all the hash buckets of the specified table.
129 */ 137 */
130static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) 138static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
131{ 139{
132 int i; 140 int i;
133 struct ip_vs_dh_bucket *b; 141 struct ip_vs_dh_bucket *b;
142 struct ip_vs_dest *dest;
134 143
135 b = tbl; 144 b = &s->buckets[0];
136 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { 145 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
137 if (b->dest) { 146 dest = rcu_dereference_protected(b->dest, 1);
138 atomic_dec(&b->dest->refcnt); 147 if (dest) {
139 b->dest = NULL; 148 ip_vs_dest_put(dest);
149 RCU_INIT_POINTER(b->dest, NULL);
140 } 150 }
141 b++; 151 b++;
142 } 152 }
@@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
145 155
146static int ip_vs_dh_init_svc(struct ip_vs_service *svc) 156static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
147{ 157{
148 struct ip_vs_dh_bucket *tbl; 158 struct ip_vs_dh_state *s;
149 159
150 /* allocate the DH table for this service */ 160 /* allocate the DH table for this service */
151 tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, 161 s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
152 GFP_KERNEL); 162 if (s == NULL)
153 if (tbl == NULL)
154 return -ENOMEM; 163 return -ENOMEM;
155 164
156 svc->sched_data = tbl; 165 svc->sched_data = s;
157 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for " 166 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
158 "current service\n", 167 "current service\n",
159 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); 168 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
160 169
161 /* assign the hash buckets with the updated service */ 170 /* assign the hash buckets with current dests */
162 ip_vs_dh_assign(tbl, svc); 171 ip_vs_dh_reassign(s, svc);
163 172
164 return 0; 173 return 0;
165} 174}
166 175
167 176
168static int ip_vs_dh_done_svc(struct ip_vs_service *svc) 177static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
169{ 178{
170 struct ip_vs_dh_bucket *tbl = svc->sched_data; 179 struct ip_vs_dh_state *s = svc->sched_data;
171 180
172 /* got to clean up hash buckets here */ 181 /* got to clean up hash buckets here */
173 ip_vs_dh_flush(tbl); 182 ip_vs_dh_flush(s);
174 183
175 /* release the table itself */ 184 /* release the table itself */
176 kfree(svc->sched_data); 185 kfree_rcu(s, rcu_head);
177 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", 186 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
178 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); 187 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
179
180 return 0;
181} 188}
182 189
183 190
184static int ip_vs_dh_update_svc(struct ip_vs_service *svc) 191static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
192 struct ip_vs_dest *dest)
185{ 193{
186 struct ip_vs_dh_bucket *tbl = svc->sched_data; 194 struct ip_vs_dh_state *s = svc->sched_data;
187
188 /* got to clean up hash buckets here */
189 ip_vs_dh_flush(tbl);
190 195
191 /* assign the hash buckets with the updated service */ 196 /* assign the hash buckets with the updated service */
192 ip_vs_dh_assign(tbl, svc); 197 ip_vs_dh_reassign(s, svc);
193 198
194 return 0; 199 return 0;
195} 200}
@@ -212,19 +217,20 @@ static struct ip_vs_dest *
212ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 217ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
213{ 218{
214 struct ip_vs_dest *dest; 219 struct ip_vs_dest *dest;
215 struct ip_vs_dh_bucket *tbl; 220 struct ip_vs_dh_state *s;
216 struct ip_vs_iphdr iph; 221 struct ip_vs_iphdr iph;
217 222
218 ip_vs_fill_iph_addr_only(svc->af, skb, &iph); 223 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
219 224
220 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 225 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
221 226
222 tbl = (struct ip_vs_dh_bucket *)svc->sched_data; 227 s = (struct ip_vs_dh_state *) svc->sched_data;
223 dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr); 228 dest = ip_vs_dh_get(svc->af, s, &iph.daddr);
224 if (!dest 229 if (!dest
225 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 230 || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
226 || atomic_read(&dest->weight) <= 0 231 || atomic_read(&dest->weight) <= 0
227 || is_overloaded(dest)) { 232 || is_overloaded(dest)) {
233 ip_vs_scheduler_err(svc, "no destination available");
228 return NULL; 234 return NULL;
229 } 235 }
230 236
@@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
248 .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), 254 .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
249 .init_service = ip_vs_dh_init_svc, 255 .init_service = ip_vs_dh_init_svc,
250 .done_service = ip_vs_dh_done_svc, 256 .done_service = ip_vs_dh_done_svc,
251 .update_service = ip_vs_dh_update_svc, 257 .add_dest = ip_vs_dh_dest_changed,
258 .del_dest = ip_vs_dh_dest_changed,
252 .schedule = ip_vs_dh_schedule, 259 .schedule = ip_vs_dh_schedule,
253}; 260};
254 261
@@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void)
262static void __exit ip_vs_dh_cleanup(void) 269static void __exit ip_vs_dh_cleanup(void)
263{ 270{
264 unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); 271 unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
272 synchronize_rcu();
265} 273}
266 274
267 275
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 0fac6017b6fb..6bee6d0c73a5 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -56,7 +56,7 @@
56 * Make a summary from each cpu 56 * Make a summary from each cpu
57 */ 57 */
58static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, 58static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
59 struct ip_vs_cpu_stats *stats) 59 struct ip_vs_cpu_stats __percpu *stats)
60{ 60{
61 int i; 61 int i;
62 62
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 4f53a5f04437..77c173282f38 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
267 * hopefully it will succeed on the retransmitted 267 * hopefully it will succeed on the retransmitted
268 * packet. 268 * packet.
269 */ 269 */
270 rcu_read_lock();
270 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, 271 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
271 iph->ihl * 4, 272 iph->ihl * 4,
272 start-data, end-start, 273 start-data, end-start,
273 buf, buf_len); 274 buf, buf_len);
275 rcu_read_unlock();
274 if (ret) { 276 if (ret) {
275 ip_vs_nfct_expect_related(skb, ct, n_cp, 277 ip_vs_nfct_expect_related(skb, ct, n_cp,
276 IPPROTO_TCP, 0, 0); 278 IPPROTO_TCP, 0, 0);
@@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void)
480 int rv; 482 int rv;
481 483
482 rv = register_pernet_subsys(&ip_vs_ftp_ops); 484 rv = register_pernet_subsys(&ip_vs_ftp_ops);
485 /* rcu_barrier() is called by netns on error */
483 return rv; 486 return rv;
484} 487}
485 488
@@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void)
489static void __exit ip_vs_ftp_exit(void) 492static void __exit ip_vs_ftp_exit(void)
490{ 493{
491 unregister_pernet_subsys(&ip_vs_ftp_ops); 494 unregister_pernet_subsys(&ip_vs_ftp_ops);
495 /* rcu_barrier() is called by netns */
492} 496}
493 497
494 498
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index fdd89b9564ea..b2cc2528a4df 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -90,11 +90,12 @@
90 * IP address and its destination server 90 * IP address and its destination server
91 */ 91 */
92struct ip_vs_lblc_entry { 92struct ip_vs_lblc_entry {
93 struct list_head list; 93 struct hlist_node list;
94 int af; /* address family */ 94 int af; /* address family */
95 union nf_inet_addr addr; /* destination IP address */ 95 union nf_inet_addr addr; /* destination IP address */
96 struct ip_vs_dest *dest; /* real server (cache) */ 96 struct ip_vs_dest __rcu *dest; /* real server (cache) */
97 unsigned long lastuse; /* last used time */ 97 unsigned long lastuse; /* last used time */
98 struct rcu_head rcu_head;
98}; 99};
99 100
100 101
@@ -102,12 +103,14 @@ struct ip_vs_lblc_entry {
102 * IPVS lblc hash table 103 * IPVS lblc hash table
103 */ 104 */
104struct ip_vs_lblc_table { 105struct ip_vs_lblc_table {
105 struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ 106 struct rcu_head rcu_head;
107 struct hlist_head __rcu bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
108 struct timer_list periodic_timer; /* collect stale entries */
106 atomic_t entries; /* number of entries */ 109 atomic_t entries; /* number of entries */
107 int max_size; /* maximum size of entries */ 110 int max_size; /* maximum size of entries */
108 struct timer_list periodic_timer; /* collect stale entries */
109 int rover; /* rover for expire check */ 111 int rover; /* rover for expire check */
110 int counter; /* counter for no expire */ 112 int counter; /* counter for no expire */
113 bool dead;
111}; 114};
112 115
113 116
@@ -129,13 +132,16 @@ static ctl_table vs_vars_table[] = {
129 132
130static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) 133static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
131{ 134{
132 list_del(&en->list); 135 struct ip_vs_dest *dest;
136
137 hlist_del_rcu(&en->list);
133 /* 138 /*
134 * We don't kfree dest because it is referred either by its service 139 * We don't kfree dest because it is referred either by its service
135 * or the trash dest list. 140 * or the trash dest list.
136 */ 141 */
137 atomic_dec(&en->dest->refcnt); 142 dest = rcu_dereference_protected(en->dest, 1);
138 kfree(en); 143 ip_vs_dest_put(dest);
144 kfree_rcu(en, rcu_head);
139} 145}
140 146
141 147
@@ -165,15 +171,12 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
165{ 171{
166 unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr); 172 unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr);
167 173
168 list_add(&en->list, &tbl->bucket[hash]); 174 hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
169 atomic_inc(&tbl->entries); 175 atomic_inc(&tbl->entries);
170} 176}
171 177
172 178
173/* 179/* Get ip_vs_lblc_entry associated with supplied parameters. */
174 * Get ip_vs_lblc_entry associated with supplied parameters. Called under read
175 * lock
176 */
177static inline struct ip_vs_lblc_entry * 180static inline struct ip_vs_lblc_entry *
178ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, 181ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
179 const union nf_inet_addr *addr) 182 const union nf_inet_addr *addr)
@@ -181,7 +184,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
181 unsigned int hash = ip_vs_lblc_hashkey(af, addr); 184 unsigned int hash = ip_vs_lblc_hashkey(af, addr);
182 struct ip_vs_lblc_entry *en; 185 struct ip_vs_lblc_entry *en;
183 186
184 list_for_each_entry(en, &tbl->bucket[hash], list) 187 hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
185 if (ip_vs_addr_equal(af, &en->addr, addr)) 188 if (ip_vs_addr_equal(af, &en->addr, addr))
186 return en; 189 return en;
187 190
@@ -191,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
191 194
192/* 195/*
193 * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP 196 * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
194 * address to a server. Called under write lock. 197 * address to a server. Called under spin lock.
195 */ 198 */
196static inline struct ip_vs_lblc_entry * 199static inline struct ip_vs_lblc_entry *
197ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, 200ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
@@ -209,14 +212,20 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
209 ip_vs_addr_copy(dest->af, &en->addr, daddr); 212 ip_vs_addr_copy(dest->af, &en->addr, daddr);
210 en->lastuse = jiffies; 213 en->lastuse = jiffies;
211 214
212 atomic_inc(&dest->refcnt); 215 ip_vs_dest_hold(dest);
213 en->dest = dest; 216 RCU_INIT_POINTER(en->dest, dest);
214 217
215 ip_vs_lblc_hash(tbl, en); 218 ip_vs_lblc_hash(tbl, en);
216 } else if (en->dest != dest) { 219 } else {
217 atomic_dec(&en->dest->refcnt); 220 struct ip_vs_dest *old_dest;
218 atomic_inc(&dest->refcnt); 221
219 en->dest = dest; 222 old_dest = rcu_dereference_protected(en->dest, 1);
223 if (old_dest != dest) {
224 ip_vs_dest_put(old_dest);
225 ip_vs_dest_hold(dest);
226 /* No ordering constraints for refcnt */
227 RCU_INIT_POINTER(en->dest, dest);
228 }
220 } 229 }
221 230
222 return en; 231 return en;
@@ -226,17 +235,22 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
226/* 235/*
227 * Flush all the entries of the specified table. 236 * Flush all the entries of the specified table.
228 */ 237 */
229static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) 238static void ip_vs_lblc_flush(struct ip_vs_service *svc)
230{ 239{
231 struct ip_vs_lblc_entry *en, *nxt; 240 struct ip_vs_lblc_table *tbl = svc->sched_data;
241 struct ip_vs_lblc_entry *en;
242 struct hlist_node *next;
232 int i; 243 int i;
233 244
245 spin_lock_bh(&svc->sched_lock);
246 tbl->dead = 1;
234 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { 247 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
235 list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { 248 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
236 ip_vs_lblc_free(en); 249 ip_vs_lblc_free(en);
237 atomic_dec(&tbl->entries); 250 atomic_dec(&tbl->entries);
238 } 251 }
239 } 252 }
253 spin_unlock_bh(&svc->sched_lock);
240} 254}
241 255
242static int sysctl_lblc_expiration(struct ip_vs_service *svc) 256static int sysctl_lblc_expiration(struct ip_vs_service *svc)
@@ -252,15 +266,16 @@ static int sysctl_lblc_expiration(struct ip_vs_service *svc)
252static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) 266static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
253{ 267{
254 struct ip_vs_lblc_table *tbl = svc->sched_data; 268 struct ip_vs_lblc_table *tbl = svc->sched_data;
255 struct ip_vs_lblc_entry *en, *nxt; 269 struct ip_vs_lblc_entry *en;
270 struct hlist_node *next;
256 unsigned long now = jiffies; 271 unsigned long now = jiffies;
257 int i, j; 272 int i, j;
258 273
259 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { 274 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
260 j = (j + 1) & IP_VS_LBLC_TAB_MASK; 275 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
261 276
262 write_lock(&svc->sched_lock); 277 spin_lock(&svc->sched_lock);
263 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 278 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
264 if (time_before(now, 279 if (time_before(now,
265 en->lastuse + 280 en->lastuse +
266 sysctl_lblc_expiration(svc))) 281 sysctl_lblc_expiration(svc)))
@@ -269,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
269 ip_vs_lblc_free(en); 284 ip_vs_lblc_free(en);
270 atomic_dec(&tbl->entries); 285 atomic_dec(&tbl->entries);
271 } 286 }
272 write_unlock(&svc->sched_lock); 287 spin_unlock(&svc->sched_lock);
273 } 288 }
274 tbl->rover = j; 289 tbl->rover = j;
275} 290}
@@ -293,7 +308,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
293 unsigned long now = jiffies; 308 unsigned long now = jiffies;
294 int goal; 309 int goal;
295 int i, j; 310 int i, j;
296 struct ip_vs_lblc_entry *en, *nxt; 311 struct ip_vs_lblc_entry *en;
312 struct hlist_node *next;
297 313
298 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { 314 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
299 /* do full expiration check */ 315 /* do full expiration check */
@@ -314,8 +330,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
314 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { 330 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
315 j = (j + 1) & IP_VS_LBLC_TAB_MASK; 331 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
316 332
317 write_lock(&svc->sched_lock); 333 spin_lock(&svc->sched_lock);
318 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 334 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
319 if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) 335 if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
320 continue; 336 continue;
321 337
@@ -323,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
323 atomic_dec(&tbl->entries); 339 atomic_dec(&tbl->entries);
324 goal--; 340 goal--;
325 } 341 }
326 write_unlock(&svc->sched_lock); 342 spin_unlock(&svc->sched_lock);
327 if (goal <= 0) 343 if (goal <= 0)
328 break; 344 break;
329 } 345 }
@@ -354,11 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
354 * Initialize the hash buckets 370 * Initialize the hash buckets
355 */ 371 */
356 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { 372 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
357 INIT_LIST_HEAD(&tbl->bucket[i]); 373 INIT_HLIST_HEAD(&tbl->bucket[i]);
358 } 374 }
359 tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; 375 tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
360 tbl->rover = 0; 376 tbl->rover = 0;
361 tbl->counter = 1; 377 tbl->counter = 1;
378 tbl->dead = 0;
362 379
363 /* 380 /*
364 * Hook periodic timer for garbage collection 381 * Hook periodic timer for garbage collection
@@ -371,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
371} 388}
372 389
373 390
374static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) 391static void ip_vs_lblc_done_svc(struct ip_vs_service *svc)
375{ 392{
376 struct ip_vs_lblc_table *tbl = svc->sched_data; 393 struct ip_vs_lblc_table *tbl = svc->sched_data;
377 394
@@ -379,14 +396,12 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
379 del_timer_sync(&tbl->periodic_timer); 396 del_timer_sync(&tbl->periodic_timer);
380 397
381 /* got to clean up table entries here */ 398 /* got to clean up table entries here */
382 ip_vs_lblc_flush(tbl); 399 ip_vs_lblc_flush(svc);
383 400
384 /* release the table itself */ 401 /* release the table itself */
385 kfree(tbl); 402 kfree_rcu(tbl, rcu_head);
386 IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", 403 IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
387 sizeof(*tbl)); 404 sizeof(*tbl));
388
389 return 0;
390} 405}
391 406
392 407
@@ -408,7 +423,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
408 * The server with weight=0 is quiesced and will not receive any 423 * The server with weight=0 is quiesced and will not receive any
409 * new connection. 424 * new connection.
410 */ 425 */
411 list_for_each_entry(dest, &svc->destinations, n_list) { 426 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
412 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 427 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
413 continue; 428 continue;
414 if (atomic_read(&dest->weight) > 0) { 429 if (atomic_read(&dest->weight) > 0) {
@@ -423,7 +438,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
423 * Find the destination with the least load. 438 * Find the destination with the least load.
424 */ 439 */
425 nextstage: 440 nextstage:
426 list_for_each_entry_continue(dest, &svc->destinations, n_list) { 441 list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
427 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 442 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
428 continue; 443 continue;
429 444
@@ -457,7 +472,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
457 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) { 472 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
458 struct ip_vs_dest *d; 473 struct ip_vs_dest *d;
459 474
460 list_for_each_entry(d, &svc->destinations, n_list) { 475 list_for_each_entry_rcu(d, &svc->destinations, n_list) {
461 if (atomic_read(&d->activeconns)*2 476 if (atomic_read(&d->activeconns)*2
462 < atomic_read(&d->weight)) { 477 < atomic_read(&d->weight)) {
463 return 1; 478 return 1;
@@ -484,7 +499,6 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
484 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 499 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
485 500
486 /* First look in our cache */ 501 /* First look in our cache */
487 read_lock(&svc->sched_lock);
488 en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr); 502 en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr);
489 if (en) { 503 if (en) {
490 /* We only hold a read lock, but this is atomic */ 504 /* We only hold a read lock, but this is atomic */
@@ -499,14 +513,11 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
499 * free up entries from the trash at any time. 513 * free up entries from the trash at any time.
500 */ 514 */
501 515
502 if (en->dest->flags & IP_VS_DEST_F_AVAILABLE) 516 dest = rcu_dereference(en->dest);
503 dest = en->dest; 517 if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
518 atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
519 goto out;
504 } 520 }
505 read_unlock(&svc->sched_lock);
506
507 /* If the destination has a weight and is not overloaded, use it */
508 if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
509 goto out;
510 521
511 /* No cache entry or it is invalid, time to schedule */ 522 /* No cache entry or it is invalid, time to schedule */
512 dest = __ip_vs_lblc_schedule(svc); 523 dest = __ip_vs_lblc_schedule(svc);
@@ -516,9 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
516 } 527 }
517 528
518 /* If we fail to create a cache entry, we'll just use the valid dest */ 529 /* If we fail to create a cache entry, we'll just use the valid dest */
519 write_lock(&svc->sched_lock); 530 spin_lock_bh(&svc->sched_lock);
520 ip_vs_lblc_new(tbl, &iph.daddr, dest); 531 if (!tbl->dead)
521 write_unlock(&svc->sched_lock); 532 ip_vs_lblc_new(tbl, &iph.daddr, dest);
533 spin_unlock_bh(&svc->sched_lock);
522 534
523out: 535out:
524 IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", 536 IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
@@ -621,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void)
621{ 633{
622 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); 634 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
623 unregister_pernet_subsys(&ip_vs_lblc_ops); 635 unregister_pernet_subsys(&ip_vs_lblc_ops);
636 synchronize_rcu();
624} 637}
625 638
626 639
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c03b6a3ade2f..feb9656eac58 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,40 +89,44 @@
89 */ 89 */
90struct ip_vs_dest_set_elem { 90struct ip_vs_dest_set_elem {
91 struct list_head list; /* list link */ 91 struct list_head list; /* list link */
92 struct ip_vs_dest *dest; /* destination server */ 92 struct ip_vs_dest __rcu *dest; /* destination server */
93 struct rcu_head rcu_head;
93}; 94};
94 95
95struct ip_vs_dest_set { 96struct ip_vs_dest_set {
96 atomic_t size; /* set size */ 97 atomic_t size; /* set size */
97 unsigned long lastmod; /* last modified time */ 98 unsigned long lastmod; /* last modified time */
98 struct list_head list; /* destination list */ 99 struct list_head list; /* destination list */
99 rwlock_t lock; /* lock for this list */
100}; 100};
101 101
102 102
103static struct ip_vs_dest_set_elem * 103static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
104ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) 104 struct ip_vs_dest *dest, bool check)
105{ 105{
106 struct ip_vs_dest_set_elem *e; 106 struct ip_vs_dest_set_elem *e;
107 107
108 list_for_each_entry(e, &set->list, list) { 108 if (check) {
109 if (e->dest == dest) 109 list_for_each_entry(e, &set->list, list) {
110 /* already existed */ 110 struct ip_vs_dest *d;
111 return NULL; 111
112 d = rcu_dereference_protected(e->dest, 1);
113 if (d == dest)
114 /* already existed */
115 return;
116 }
112 } 117 }
113 118
114 e = kmalloc(sizeof(*e), GFP_ATOMIC); 119 e = kmalloc(sizeof(*e), GFP_ATOMIC);
115 if (e == NULL) 120 if (e == NULL)
116 return NULL; 121 return;
117 122
118 atomic_inc(&dest->refcnt); 123 ip_vs_dest_hold(dest);
119 e->dest = dest; 124 RCU_INIT_POINTER(e->dest, dest);
120 125
121 list_add(&e->list, &set->list); 126 list_add_rcu(&e->list, &set->list);
122 atomic_inc(&set->size); 127 atomic_inc(&set->size);
123 128
124 set->lastmod = jiffies; 129 set->lastmod = jiffies;
125 return e;
126} 130}
127 131
128static void 132static void
@@ -131,13 +135,16 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
131 struct ip_vs_dest_set_elem *e; 135 struct ip_vs_dest_set_elem *e;
132 136
133 list_for_each_entry(e, &set->list, list) { 137 list_for_each_entry(e, &set->list, list) {
134 if (e->dest == dest) { 138 struct ip_vs_dest *d;
139
140 d = rcu_dereference_protected(e->dest, 1);
141 if (d == dest) {
135 /* HIT */ 142 /* HIT */
136 atomic_dec(&set->size); 143 atomic_dec(&set->size);
137 set->lastmod = jiffies; 144 set->lastmod = jiffies;
138 atomic_dec(&e->dest->refcnt); 145 ip_vs_dest_put(dest);
139 list_del(&e->list); 146 list_del_rcu(&e->list);
140 kfree(e); 147 kfree_rcu(e, rcu_head);
141 break; 148 break;
142 } 149 }
143 } 150 }
@@ -147,17 +154,18 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
147{ 154{
148 struct ip_vs_dest_set_elem *e, *ep; 155 struct ip_vs_dest_set_elem *e, *ep;
149 156
150 write_lock(&set->lock);
151 list_for_each_entry_safe(e, ep, &set->list, list) { 157 list_for_each_entry_safe(e, ep, &set->list, list) {
158 struct ip_vs_dest *d;
159
160 d = rcu_dereference_protected(e->dest, 1);
152 /* 161 /*
153 * We don't kfree dest because it is referred either 162 * We don't kfree dest because it is referred either
154 * by its service or by the trash dest list. 163 * by its service or by the trash dest list.
155 */ 164 */
156 atomic_dec(&e->dest->refcnt); 165 ip_vs_dest_put(d);
157 list_del(&e->list); 166 list_del_rcu(&e->list);
158 kfree(e); 167 kfree_rcu(e, rcu_head);
159 } 168 }
160 write_unlock(&set->lock);
161} 169}
162 170
163/* get weighted least-connection node in the destination set */ 171/* get weighted least-connection node in the destination set */
@@ -171,8 +179,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
171 return NULL; 179 return NULL;
172 180
173 /* select the first destination server, whose weight > 0 */ 181 /* select the first destination server, whose weight > 0 */
174 list_for_each_entry(e, &set->list, list) { 182 list_for_each_entry_rcu(e, &set->list, list) {
175 least = e->dest; 183 least = rcu_dereference(e->dest);
176 if (least->flags & IP_VS_DEST_F_OVERLOAD) 184 if (least->flags & IP_VS_DEST_F_OVERLOAD)
177 continue; 185 continue;
178 186
@@ -186,8 +194,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
186 194
187 /* find the destination with the weighted least load */ 195 /* find the destination with the weighted least load */
188 nextstage: 196 nextstage:
189 list_for_each_entry(e, &set->list, list) { 197 list_for_each_entry_continue_rcu(e, &set->list, list) {
190 dest = e->dest; 198 dest = rcu_dereference(e->dest);
191 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 199 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
192 continue; 200 continue;
193 201
@@ -224,7 +232,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
224 232
225 /* select the first destination server, whose weight > 0 */ 233 /* select the first destination server, whose weight > 0 */
226 list_for_each_entry(e, &set->list, list) { 234 list_for_each_entry(e, &set->list, list) {
227 most = e->dest; 235 most = rcu_dereference_protected(e->dest, 1);
228 if (atomic_read(&most->weight) > 0) { 236 if (atomic_read(&most->weight) > 0) {
229 moh = ip_vs_dest_conn_overhead(most); 237 moh = ip_vs_dest_conn_overhead(most);
230 goto nextstage; 238 goto nextstage;
@@ -234,8 +242,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
234 242
235 /* find the destination with the weighted most load */ 243 /* find the destination with the weighted most load */
236 nextstage: 244 nextstage:
237 list_for_each_entry(e, &set->list, list) { 245 list_for_each_entry_continue(e, &set->list, list) {
238 dest = e->dest; 246 dest = rcu_dereference_protected(e->dest, 1);
239 doh = ip_vs_dest_conn_overhead(dest); 247 doh = ip_vs_dest_conn_overhead(dest);
240 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 248 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
241 if ((moh * atomic_read(&dest->weight) < 249 if ((moh * atomic_read(&dest->weight) <
@@ -262,11 +270,12 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
262 * IP address and its destination server set 270 * IP address and its destination server set
263 */ 271 */
264struct ip_vs_lblcr_entry { 272struct ip_vs_lblcr_entry {
265 struct list_head list; 273 struct hlist_node list;
266 int af; /* address family */ 274 int af; /* address family */
267 union nf_inet_addr addr; /* destination IP address */ 275 union nf_inet_addr addr; /* destination IP address */
268 struct ip_vs_dest_set set; /* destination server set */ 276 struct ip_vs_dest_set set; /* destination server set */
269 unsigned long lastuse; /* last used time */ 277 unsigned long lastuse; /* last used time */
278 struct rcu_head rcu_head;
270}; 279};
271 280
272 281
@@ -274,12 +283,14 @@ struct ip_vs_lblcr_entry {
274 * IPVS lblcr hash table 283 * IPVS lblcr hash table
275 */ 284 */
276struct ip_vs_lblcr_table { 285struct ip_vs_lblcr_table {
277 struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ 286 struct rcu_head rcu_head;
287 struct hlist_head __rcu bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
278 atomic_t entries; /* number of entries */ 288 atomic_t entries; /* number of entries */
279 int max_size; /* maximum size of entries */ 289 int max_size; /* maximum size of entries */
280 struct timer_list periodic_timer; /* collect stale entries */ 290 struct timer_list periodic_timer; /* collect stale entries */
281 int rover; /* rover for expire check */ 291 int rover; /* rover for expire check */
282 int counter; /* counter for no expire */ 292 int counter; /* counter for no expire */
293 bool dead;
283}; 294};
284 295
285 296
@@ -302,9 +313,9 @@ static ctl_table vs_vars_table[] = {
302 313
303static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) 314static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
304{ 315{
305 list_del(&en->list); 316 hlist_del_rcu(&en->list);
306 ip_vs_dest_set_eraseall(&en->set); 317 ip_vs_dest_set_eraseall(&en->set);
307 kfree(en); 318 kfree_rcu(en, rcu_head);
308} 319}
309 320
310 321
@@ -334,15 +345,12 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
334{ 345{
335 unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr); 346 unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
336 347
337 list_add(&en->list, &tbl->bucket[hash]); 348 hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
338 atomic_inc(&tbl->entries); 349 atomic_inc(&tbl->entries);
339} 350}
340 351
341 352
342/* 353/* Get ip_vs_lblcr_entry associated with supplied parameters. */
343 * Get ip_vs_lblcr_entry associated with supplied parameters. Called under
344 * read lock.
345 */
346static inline struct ip_vs_lblcr_entry * 354static inline struct ip_vs_lblcr_entry *
347ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, 355ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
348 const union nf_inet_addr *addr) 356 const union nf_inet_addr *addr)
@@ -350,7 +358,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
350 unsigned int hash = ip_vs_lblcr_hashkey(af, addr); 358 unsigned int hash = ip_vs_lblcr_hashkey(af, addr);
351 struct ip_vs_lblcr_entry *en; 359 struct ip_vs_lblcr_entry *en;
352 360
353 list_for_each_entry(en, &tbl->bucket[hash], list) 361 hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
354 if (ip_vs_addr_equal(af, &en->addr, addr)) 362 if (ip_vs_addr_equal(af, &en->addr, addr))
355 return en; 363 return en;
356 364
@@ -360,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
360 368
361/* 369/*
362 * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination 370 * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
363 * IP address to a server. Called under write lock. 371 * IP address to a server. Called under spin lock.
364 */ 372 */
365static inline struct ip_vs_lblcr_entry * 373static inline struct ip_vs_lblcr_entry *
366ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, 374ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
@@ -381,14 +389,14 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
381 /* initialize its dest set */ 389 /* initialize its dest set */
382 atomic_set(&(en->set.size), 0); 390 atomic_set(&(en->set.size), 0);
383 INIT_LIST_HEAD(&en->set.list); 391 INIT_LIST_HEAD(&en->set.list);
384 rwlock_init(&en->set.lock); 392
393 ip_vs_dest_set_insert(&en->set, dest, false);
385 394
386 ip_vs_lblcr_hash(tbl, en); 395 ip_vs_lblcr_hash(tbl, en);
396 return en;
387 } 397 }
388 398
389 write_lock(&en->set.lock); 399 ip_vs_dest_set_insert(&en->set, dest, true);
390 ip_vs_dest_set_insert(&en->set, dest);
391 write_unlock(&en->set.lock);
392 400
393 return en; 401 return en;
394} 402}
@@ -397,17 +405,21 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
397/* 405/*
398 * Flush all the entries of the specified table. 406 * Flush all the entries of the specified table.
399 */ 407 */
400static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) 408static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
401{ 409{
410 struct ip_vs_lblcr_table *tbl = svc->sched_data;
402 int i; 411 int i;
403 struct ip_vs_lblcr_entry *en, *nxt; 412 struct ip_vs_lblcr_entry *en;
413 struct hlist_node *next;
404 414
405 /* No locking required, only called during cleanup. */ 415 spin_lock_bh(&svc->sched_lock);
416 tbl->dead = 1;
406 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { 417 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
407 list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { 418 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
408 ip_vs_lblcr_free(en); 419 ip_vs_lblcr_free(en);
409 } 420 }
410 } 421 }
422 spin_unlock_bh(&svc->sched_lock);
411} 423}
412 424
413static int sysctl_lblcr_expiration(struct ip_vs_service *svc) 425static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
@@ -425,13 +437,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
425 struct ip_vs_lblcr_table *tbl = svc->sched_data; 437 struct ip_vs_lblcr_table *tbl = svc->sched_data;
426 unsigned long now = jiffies; 438 unsigned long now = jiffies;
427 int i, j; 439 int i, j;
428 struct ip_vs_lblcr_entry *en, *nxt; 440 struct ip_vs_lblcr_entry *en;
441 struct hlist_node *next;
429 442
430 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 443 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
431 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 444 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
432 445
433 write_lock(&svc->sched_lock); 446 spin_lock(&svc->sched_lock);
434 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 447 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
435 if (time_after(en->lastuse + 448 if (time_after(en->lastuse +
436 sysctl_lblcr_expiration(svc), now)) 449 sysctl_lblcr_expiration(svc), now))
437 continue; 450 continue;
@@ -439,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
439 ip_vs_lblcr_free(en); 452 ip_vs_lblcr_free(en);
440 atomic_dec(&tbl->entries); 453 atomic_dec(&tbl->entries);
441 } 454 }
442 write_unlock(&svc->sched_lock); 455 spin_unlock(&svc->sched_lock);
443 } 456 }
444 tbl->rover = j; 457 tbl->rover = j;
445} 458}
@@ -463,7 +476,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
463 unsigned long now = jiffies; 476 unsigned long now = jiffies;
464 int goal; 477 int goal;
465 int i, j; 478 int i, j;
466 struct ip_vs_lblcr_entry *en, *nxt; 479 struct ip_vs_lblcr_entry *en;
480 struct hlist_node *next;
467 481
468 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { 482 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
469 /* do full expiration check */ 483 /* do full expiration check */
@@ -484,8 +498,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
484 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 498 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
485 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 499 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
486 500
487 write_lock(&svc->sched_lock); 501 spin_lock(&svc->sched_lock);
488 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 502 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
489 if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) 503 if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
490 continue; 504 continue;
491 505
@@ -493,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
493 atomic_dec(&tbl->entries); 507 atomic_dec(&tbl->entries);
494 goal--; 508 goal--;
495 } 509 }
496 write_unlock(&svc->sched_lock); 510 spin_unlock(&svc->sched_lock);
497 if (goal <= 0) 511 if (goal <= 0)
498 break; 512 break;
499 } 513 }
@@ -523,11 +537,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
523 * Initialize the hash buckets 537 * Initialize the hash buckets
524 */ 538 */
525 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { 539 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
526 INIT_LIST_HEAD(&tbl->bucket[i]); 540 INIT_HLIST_HEAD(&tbl->bucket[i]);
527 } 541 }
528 tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; 542 tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
529 tbl->rover = 0; 543 tbl->rover = 0;
530 tbl->counter = 1; 544 tbl->counter = 1;
545 tbl->dead = 0;
531 546
532 /* 547 /*
533 * Hook periodic timer for garbage collection 548 * Hook periodic timer for garbage collection
@@ -540,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
540} 555}
541 556
542 557
543static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) 558static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
544{ 559{
545 struct ip_vs_lblcr_table *tbl = svc->sched_data; 560 struct ip_vs_lblcr_table *tbl = svc->sched_data;
546 561
@@ -548,14 +563,12 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
548 del_timer_sync(&tbl->periodic_timer); 563 del_timer_sync(&tbl->periodic_timer);
549 564
550 /* got to clean up table entries here */ 565 /* got to clean up table entries here */
551 ip_vs_lblcr_flush(tbl); 566 ip_vs_lblcr_flush(svc);
552 567
553 /* release the table itself */ 568 /* release the table itself */
554 kfree(tbl); 569 kfree_rcu(tbl, rcu_head);
555 IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", 570 IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
556 sizeof(*tbl)); 571 sizeof(*tbl));
557
558 return 0;
559} 572}
560 573
561 574
@@ -577,7 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
577 * The server with weight=0 is quiesced and will not receive any 590 * The server with weight=0 is quiesced and will not receive any
578 * new connection. 591 * new connection.
579 */ 592 */
580 list_for_each_entry(dest, &svc->destinations, n_list) { 593 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
581 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 594 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
582 continue; 595 continue;
583 596
@@ -593,7 +606,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
593 * Find the destination with the least load. 606 * Find the destination with the least load.
594 */ 607 */
595 nextstage: 608 nextstage:
596 list_for_each_entry_continue(dest, &svc->destinations, n_list) { 609 list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
597 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 610 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
598 continue; 611 continue;
599 612
@@ -627,7 +640,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
627 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) { 640 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
628 struct ip_vs_dest *d; 641 struct ip_vs_dest *d;
629 642
630 list_for_each_entry(d, &svc->destinations, n_list) { 643 list_for_each_entry_rcu(d, &svc->destinations, n_list) {
631 if (atomic_read(&d->activeconns)*2 644 if (atomic_read(&d->activeconns)*2
632 < atomic_read(&d->weight)) { 645 < atomic_read(&d->weight)) {
633 return 1; 646 return 1;
@@ -646,7 +659,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
646{ 659{
647 struct ip_vs_lblcr_table *tbl = svc->sched_data; 660 struct ip_vs_lblcr_table *tbl = svc->sched_data;
648 struct ip_vs_iphdr iph; 661 struct ip_vs_iphdr iph;
649 struct ip_vs_dest *dest = NULL; 662 struct ip_vs_dest *dest;
650 struct ip_vs_lblcr_entry *en; 663 struct ip_vs_lblcr_entry *en;
651 664
652 ip_vs_fill_iph_addr_only(svc->af, skb, &iph); 665 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
@@ -654,53 +667,46 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
654 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 667 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
655 668
656 /* First look in our cache */ 669 /* First look in our cache */
657 read_lock(&svc->sched_lock);
658 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); 670 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
659 if (en) { 671 if (en) {
660 /* We only hold a read lock, but this is atomic */
661 en->lastuse = jiffies; 672 en->lastuse = jiffies;
662 673
663 /* Get the least loaded destination */ 674 /* Get the least loaded destination */
664 read_lock(&en->set.lock);
665 dest = ip_vs_dest_set_min(&en->set); 675 dest = ip_vs_dest_set_min(&en->set);
666 read_unlock(&en->set.lock);
667 676
668 /* More than one destination + enough time passed by, cleanup */ 677 /* More than one destination + enough time passed by, cleanup */
669 if (atomic_read(&en->set.size) > 1 && 678 if (atomic_read(&en->set.size) > 1 &&
670 time_after(jiffies, en->set.lastmod + 679 time_after(jiffies, en->set.lastmod +
671 sysctl_lblcr_expiration(svc))) { 680 sysctl_lblcr_expiration(svc))) {
672 struct ip_vs_dest *m; 681 spin_lock_bh(&svc->sched_lock);
682 if (atomic_read(&en->set.size) > 1) {
683 struct ip_vs_dest *m;
673 684
674 write_lock(&en->set.lock); 685 m = ip_vs_dest_set_max(&en->set);
675 m = ip_vs_dest_set_max(&en->set); 686 if (m)
676 if (m) 687 ip_vs_dest_set_erase(&en->set, m);
677 ip_vs_dest_set_erase(&en->set, m); 688 }
678 write_unlock(&en->set.lock); 689 spin_unlock_bh(&svc->sched_lock);
679 } 690 }
680 691
681 /* If the destination is not overloaded, use it */ 692 /* If the destination is not overloaded, use it */
682 if (dest && !is_overloaded(dest, svc)) { 693 if (dest && !is_overloaded(dest, svc))
683 read_unlock(&svc->sched_lock);
684 goto out; 694 goto out;
685 }
686 695
687 /* The cache entry is invalid, time to schedule */ 696 /* The cache entry is invalid, time to schedule */
688 dest = __ip_vs_lblcr_schedule(svc); 697 dest = __ip_vs_lblcr_schedule(svc);
689 if (!dest) { 698 if (!dest) {
690 ip_vs_scheduler_err(svc, "no destination available"); 699 ip_vs_scheduler_err(svc, "no destination available");
691 read_unlock(&svc->sched_lock);
692 return NULL; 700 return NULL;
693 } 701 }
694 702
695 /* Update our cache entry */ 703 /* Update our cache entry */
696 write_lock(&en->set.lock); 704 spin_lock_bh(&svc->sched_lock);
697 ip_vs_dest_set_insert(&en->set, dest); 705 if (!tbl->dead)
698 write_unlock(&en->set.lock); 706 ip_vs_dest_set_insert(&en->set, dest, true);
699 } 707 spin_unlock_bh(&svc->sched_lock);
700 read_unlock(&svc->sched_lock);
701
702 if (dest)
703 goto out; 708 goto out;
709 }
704 710
705 /* No cache entry, time to schedule */ 711 /* No cache entry, time to schedule */
706 dest = __ip_vs_lblcr_schedule(svc); 712 dest = __ip_vs_lblcr_schedule(svc);
@@ -710,9 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
710 } 716 }
711 717
712 /* If we fail to create a cache entry, we'll just use the valid dest */ 718 /* If we fail to create a cache entry, we'll just use the valid dest */
713 write_lock(&svc->sched_lock); 719 spin_lock_bh(&svc->sched_lock);
714 ip_vs_lblcr_new(tbl, &iph.daddr, dest); 720 if (!tbl->dead)
715 write_unlock(&svc->sched_lock); 721 ip_vs_lblcr_new(tbl, &iph.daddr, dest);
722 spin_unlock_bh(&svc->sched_lock);
716 723
717out: 724out:
718 IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", 725 IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
@@ -814,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
814{ 821{
815 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 822 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
816 unregister_pernet_subsys(&ip_vs_lblcr_ops); 823 unregister_pernet_subsys(&ip_vs_lblcr_ops);
824 synchronize_rcu();
817} 825}
818 826
819 827
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index f391819c0cca..5128e338a749 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
42 * served, but no new connection is assigned to the server. 42 * served, but no new connection is assigned to the server.
43 */ 43 */
44 44
45 list_for_each_entry(dest, &svc->destinations, n_list) { 45 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
46 if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || 46 if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
47 atomic_read(&dest->weight) == 0) 47 atomic_read(&dest->weight) == 0)
48 continue; 48 continue;
@@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void)
84static void __exit ip_vs_lc_cleanup(void) 84static void __exit ip_vs_lc_cleanup(void)
85{ 85{
86 unregister_ip_vs_scheduler(&ip_vs_lc_scheduler); 86 unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
87 synchronize_rcu();
87} 88}
88 89
89module_init(ip_vs_lc_init); 90module_init(ip_vs_lc_init);
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 984d9c137d84..646cfd4baa73 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
75 * new connections. 75 * new connections.
76 */ 76 */
77 77
78 list_for_each_entry(dest, &svc->destinations, n_list) { 78 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
79 79
80 if (dest->flags & IP_VS_DEST_F_OVERLOAD || 80 if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
81 !atomic_read(&dest->weight)) 81 !atomic_read(&dest->weight))
@@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void)
133static void __exit ip_vs_nq_cleanup(void) 133static void __exit ip_vs_nq_cleanup(void)
134{ 134{
135 unregister_ip_vs_scheduler(&ip_vs_nq_scheduler); 135 unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
136 synchronize_rcu();
136} 137}
137 138
138module_init(ip_vs_nq_init); 139module_init(ip_vs_nq_init);
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 5cf859ccb31b..1a82b29ce8ea 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -13,20 +13,8 @@
13/* IPVS pe list */ 13/* IPVS pe list */
14static LIST_HEAD(ip_vs_pe); 14static LIST_HEAD(ip_vs_pe);
15 15
16/* lock for service table */ 16/* semaphore for IPVS PEs. */
17static DEFINE_SPINLOCK(ip_vs_pe_lock); 17static DEFINE_MUTEX(ip_vs_pe_mutex);
18
19/* Bind a service with a pe */
20void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
21{
22 svc->pe = pe;
23}
24
25/* Unbind a service from its pe */
26void ip_vs_unbind_pe(struct ip_vs_service *svc)
27{
28 svc->pe = NULL;
29}
30 18
31/* Get pe in the pe list by name */ 19/* Get pe in the pe list by name */
32struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) 20struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
@@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
36 IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, 24 IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
37 pe_name); 25 pe_name);
38 26
39 spin_lock_bh(&ip_vs_pe_lock); 27 rcu_read_lock();
40 28 list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) {
41 list_for_each_entry(pe, &ip_vs_pe, n_list) {
42 /* Test and get the modules atomically */ 29 /* Test and get the modules atomically */
43 if (pe->module && 30 if (pe->module &&
44 !try_module_get(pe->module)) { 31 !try_module_get(pe->module)) {
@@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
47 } 34 }
48 if (strcmp(pe_name, pe->name)==0) { 35 if (strcmp(pe_name, pe->name)==0) {
49 /* HIT */ 36 /* HIT */
50 spin_unlock_bh(&ip_vs_pe_lock); 37 rcu_read_unlock();
51 return pe; 38 return pe;
52 } 39 }
53 if (pe->module) 40 if (pe->module)
54 module_put(pe->module); 41 module_put(pe->module);
55 } 42 }
43 rcu_read_unlock();
56 44
57 spin_unlock_bh(&ip_vs_pe_lock);
58 return NULL; 45 return NULL;
59} 46}
60 47
@@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
83 /* increase the module use count */ 70 /* increase the module use count */
84 ip_vs_use_count_inc(); 71 ip_vs_use_count_inc();
85 72
86 spin_lock_bh(&ip_vs_pe_lock); 73 mutex_lock(&ip_vs_pe_mutex);
87
88 if (!list_empty(&pe->n_list)) {
89 spin_unlock_bh(&ip_vs_pe_lock);
90 ip_vs_use_count_dec();
91 pr_err("%s(): [%s] pe already linked\n",
92 __func__, pe->name);
93 return -EINVAL;
94 }
95
96 /* Make sure that the pe with this name doesn't exist 74 /* Make sure that the pe with this name doesn't exist
97 * in the pe list. 75 * in the pe list.
98 */ 76 */
99 list_for_each_entry(tmp, &ip_vs_pe, n_list) { 77 list_for_each_entry(tmp, &ip_vs_pe, n_list) {
100 if (strcmp(tmp->name, pe->name) == 0) { 78 if (strcmp(tmp->name, pe->name) == 0) {
101 spin_unlock_bh(&ip_vs_pe_lock); 79 mutex_unlock(&ip_vs_pe_mutex);
102 ip_vs_use_count_dec(); 80 ip_vs_use_count_dec();
103 pr_err("%s(): [%s] pe already existed " 81 pr_err("%s(): [%s] pe already existed "
104 "in the system\n", __func__, pe->name); 82 "in the system\n", __func__, pe->name);
@@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
106 } 84 }
107 } 85 }
108 /* Add it into the d-linked pe list */ 86 /* Add it into the d-linked pe list */
109 list_add(&pe->n_list, &ip_vs_pe); 87 list_add_rcu(&pe->n_list, &ip_vs_pe);
110 spin_unlock_bh(&ip_vs_pe_lock); 88 mutex_unlock(&ip_vs_pe_mutex);
111 89
112 pr_info("[%s] pe registered.\n", pe->name); 90 pr_info("[%s] pe registered.\n", pe->name);
113 91
@@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe);
118/* Unregister a pe from the pe list */ 96/* Unregister a pe from the pe list */
119int unregister_ip_vs_pe(struct ip_vs_pe *pe) 97int unregister_ip_vs_pe(struct ip_vs_pe *pe)
120{ 98{
121 spin_lock_bh(&ip_vs_pe_lock); 99 mutex_lock(&ip_vs_pe_mutex);
122 if (list_empty(&pe->n_list)) {
123 spin_unlock_bh(&ip_vs_pe_lock);
124 pr_err("%s(): [%s] pe is not in the list. failed\n",
125 __func__, pe->name);
126 return -EINVAL;
127 }
128
129 /* Remove it from the d-linked pe list */ 100 /* Remove it from the d-linked pe list */
130 list_del(&pe->n_list); 101 list_del_rcu(&pe->n_list);
131 spin_unlock_bh(&ip_vs_pe_lock); 102 mutex_unlock(&ip_vs_pe_mutex);
132 103
133 /* decrease the module use count */ 104 /* decrease the module use count */
134 ip_vs_use_count_dec(); 105 ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 12475ef88daf..00cc0241ed87 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -172,6 +172,7 @@ static int __init ip_vs_sip_init(void)
172static void __exit ip_vs_sip_cleanup(void) 172static void __exit ip_vs_sip_cleanup(void)
173{ 173{
174 unregister_ip_vs_pe(&ip_vs_sip_pe); 174 unregister_ip_vs_pe(&ip_vs_sip_pe);
175 synchronize_rcu();
175} 176}
176 177
177module_init(ip_vs_sip_init); 178module_init(ip_vs_sip_init);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index cd1d7298f7ba..6e14a7b5602f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
27 if (sch == NULL) 27 if (sch == NULL)
28 return 0; 28 return 0;
29 net = skb_net(skb); 29 net = skb_net(skb);
30 rcu_read_lock();
30 if ((sch->type == SCTP_CID_INIT) && 31 if ((sch->type == SCTP_CID_INIT) &&
31 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, 32 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
32 &iph->daddr, sh->dest))) { 33 &iph->daddr, sh->dest))) {
33 int ignored; 34 int ignored;
34 35
35 if (ip_vs_todrop(net_ipvs(net))) { 36 if (ip_vs_todrop(net_ipvs(net))) {
@@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
37 * It seems that we are very loaded. 38 * It seems that we are very loaded.
38 * We have to drop this packet :( 39 * We have to drop this packet :(
39 */ 40 */
40 ip_vs_service_put(svc); 41 rcu_read_unlock();
41 *verdict = NF_DROP; 42 *verdict = NF_DROP;
42 return 0; 43 return 0;
43 } 44 }
@@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
49 if (!*cpp && ignored <= 0) { 50 if (!*cpp && ignored <= 0) {
50 if (!ignored) 51 if (!ignored)
51 *verdict = ip_vs_leave(svc, skb, pd, iph); 52 *verdict = ip_vs_leave(svc, skb, pd, iph);
52 else { 53 else
53 ip_vs_service_put(svc);
54 *verdict = NF_DROP; 54 *verdict = NF_DROP;
55 } 55 rcu_read_unlock();
56 return 0; 56 return 0;
57 } 57 }
58 ip_vs_service_put(svc);
59 } 58 }
59 rcu_read_unlock();
60 /* NF_ACCEPT */ 60 /* NF_ACCEPT */
61 return 1; 61 return 1;
62} 62}
@@ -994,9 +994,9 @@ static void
994sctp_state_transition(struct ip_vs_conn *cp, int direction, 994sctp_state_transition(struct ip_vs_conn *cp, int direction,
995 const struct sk_buff *skb, struct ip_vs_proto_data *pd) 995 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
996{ 996{
997 spin_lock(&cp->lock); 997 spin_lock_bh(&cp->lock);
998 set_sctp_state(pd, cp, direction, skb); 998 set_sctp_state(pd, cp, direction, skb);
999 spin_unlock(&cp->lock); 999 spin_unlock_bh(&cp->lock);
1000} 1000}
1001 1001
1002static inline __u16 sctp_app_hashkey(__be16 port) 1002static inline __u16 sctp_app_hashkey(__be16 port)
@@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
1016 1016
1017 hash = sctp_app_hashkey(port); 1017 hash = sctp_app_hashkey(port);
1018 1018
1019 spin_lock_bh(&ipvs->sctp_app_lock);
1020 list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { 1019 list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
1021 if (i->port == port) { 1020 if (i->port == port) {
1022 ret = -EEXIST; 1021 ret = -EEXIST;
1023 goto out; 1022 goto out;
1024 } 1023 }
1025 } 1024 }
1026 list_add(&inc->p_list, &ipvs->sctp_apps[hash]); 1025 list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
1027 atomic_inc(&pd->appcnt); 1026 atomic_inc(&pd->appcnt);
1028out: 1027out:
1029 spin_unlock_bh(&ipvs->sctp_app_lock);
1030 1028
1031 return ret; 1029 return ret;
1032} 1030}
1033 1031
1034static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) 1032static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
1035{ 1033{
1036 struct netns_ipvs *ipvs = net_ipvs(net);
1037 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); 1034 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
1038 1035
1039 spin_lock_bh(&ipvs->sctp_app_lock);
1040 atomic_dec(&pd->appcnt); 1036 atomic_dec(&pd->appcnt);
1041 list_del(&inc->p_list); 1037 list_del_rcu(&inc->p_list);
1042 spin_unlock_bh(&ipvs->sctp_app_lock);
1043} 1038}
1044 1039
1045static int sctp_app_conn_bind(struct ip_vs_conn *cp) 1040static int sctp_app_conn_bind(struct ip_vs_conn *cp)
@@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1055 /* Lookup application incarnations and bind the right one */ 1050 /* Lookup application incarnations and bind the right one */
1056 hash = sctp_app_hashkey(cp->vport); 1051 hash = sctp_app_hashkey(cp->vport);
1057 1052
1058 spin_lock(&ipvs->sctp_app_lock); 1053 rcu_read_lock();
1059 list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { 1054 list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
1060 if (inc->port == cp->vport) { 1055 if (inc->port == cp->vport) {
1061 if (unlikely(!ip_vs_app_inc_get(inc))) 1056 if (unlikely(!ip_vs_app_inc_get(inc)))
1062 break; 1057 break;
1063 spin_unlock(&ipvs->sctp_app_lock); 1058 rcu_read_unlock();
1064 1059
1065 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" 1060 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
1066 "%s:%u to app %s on port %u\n", 1061 "%s:%u to app %s on port %u\n",
@@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1076 goto out; 1071 goto out;
1077 } 1072 }
1078 } 1073 }
1079 spin_unlock(&ipvs->sctp_app_lock); 1074 rcu_read_unlock();
1080out: 1075out:
1081 return result; 1076 return result;
1082} 1077}
@@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
1090 struct netns_ipvs *ipvs = net_ipvs(net); 1085 struct netns_ipvs *ipvs = net_ipvs(net);
1091 1086
1092 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); 1087 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
1093 spin_lock_init(&ipvs->sctp_app_lock);
1094 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, 1088 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
1095 sizeof(sctp_timeouts)); 1089 sizeof(sctp_timeouts));
1096 if (!pd->timeout_table) 1090 if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 9af653a75825..50a15944c6c1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
47 } 47 }
48 net = skb_net(skb); 48 net = skb_net(skb);
49 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 49 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
50 rcu_read_lock();
50 if (th->syn && 51 if (th->syn &&
51 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, 52 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
52 &iph->daddr, th->dest))) { 53 &iph->daddr, th->dest))) {
53 int ignored; 54 int ignored;
54 55
55 if (ip_vs_todrop(net_ipvs(net))) { 56 if (ip_vs_todrop(net_ipvs(net))) {
@@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
57 * It seems that we are very loaded. 58 * It seems that we are very loaded.
58 * We have to drop this packet :( 59 * We have to drop this packet :(
59 */ 60 */
60 ip_vs_service_put(svc); 61 rcu_read_unlock();
61 *verdict = NF_DROP; 62 *verdict = NF_DROP;
62 return 0; 63 return 0;
63 } 64 }
@@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
70 if (!*cpp && ignored <= 0) { 71 if (!*cpp && ignored <= 0) {
71 if (!ignored) 72 if (!ignored)
72 *verdict = ip_vs_leave(svc, skb, pd, iph); 73 *verdict = ip_vs_leave(svc, skb, pd, iph);
73 else { 74 else
74 ip_vs_service_put(svc);
75 *verdict = NF_DROP; 75 *verdict = NF_DROP;
76 } 76 rcu_read_unlock();
77 return 0; 77 return 0;
78 } 78 }
79 ip_vs_service_put(svc);
80 } 79 }
80 rcu_read_unlock();
81 /* NF_ACCEPT */ 81 /* NF_ACCEPT */
82 return 1; 82 return 1;
83} 83}
@@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
557 if (th == NULL) 557 if (th == NULL)
558 return; 558 return;
559 559
560 spin_lock(&cp->lock); 560 spin_lock_bh(&cp->lock);
561 set_tcp_state(pd, cp, direction, th); 561 set_tcp_state(pd, cp, direction, th);
562 spin_unlock(&cp->lock); 562 spin_unlock_bh(&cp->lock);
563} 563}
564 564
565static inline __u16 tcp_app_hashkey(__be16 port) 565static inline __u16 tcp_app_hashkey(__be16 port)
@@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
580 580
581 hash = tcp_app_hashkey(port); 581 hash = tcp_app_hashkey(port);
582 582
583 spin_lock_bh(&ipvs->tcp_app_lock);
584 list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { 583 list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
585 if (i->port == port) { 584 if (i->port == port) {
586 ret = -EEXIST; 585 ret = -EEXIST;
587 goto out; 586 goto out;
588 } 587 }
589 } 588 }
590 list_add(&inc->p_list, &ipvs->tcp_apps[hash]); 589 list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]);
591 atomic_inc(&pd->appcnt); 590 atomic_inc(&pd->appcnt);
592 591
593 out: 592 out:
594 spin_unlock_bh(&ipvs->tcp_app_lock);
595 return ret; 593 return ret;
596} 594}
597 595
@@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
599static void 597static void
600tcp_unregister_app(struct net *net, struct ip_vs_app *inc) 598tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
601{ 599{
602 struct netns_ipvs *ipvs = net_ipvs(net);
603 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 600 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
604 601
605 spin_lock_bh(&ipvs->tcp_app_lock);
606 atomic_dec(&pd->appcnt); 602 atomic_dec(&pd->appcnt);
607 list_del(&inc->p_list); 603 list_del_rcu(&inc->p_list);
608 spin_unlock_bh(&ipvs->tcp_app_lock);
609} 604}
610 605
611 606
@@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
624 /* Lookup application incarnations and bind the right one */ 619 /* Lookup application incarnations and bind the right one */
625 hash = tcp_app_hashkey(cp->vport); 620 hash = tcp_app_hashkey(cp->vport);
626 621
627 spin_lock(&ipvs->tcp_app_lock); 622 rcu_read_lock();
628 list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { 623 list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
629 if (inc->port == cp->vport) { 624 if (inc->port == cp->vport) {
630 if (unlikely(!ip_vs_app_inc_get(inc))) 625 if (unlikely(!ip_vs_app_inc_get(inc)))
631 break; 626 break;
632 spin_unlock(&ipvs->tcp_app_lock); 627 rcu_read_unlock();
633 628
634 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 629 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
635 "%s:%u to app %s on port %u\n", 630 "%s:%u to app %s on port %u\n",
@@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
646 goto out; 641 goto out;
647 } 642 }
648 } 643 }
649 spin_unlock(&ipvs->tcp_app_lock); 644 rcu_read_unlock();
650 645
651 out: 646 out:
652 return result; 647 return result;
@@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
660{ 655{
661 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 656 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
662 657
663 spin_lock(&cp->lock); 658 spin_lock_bh(&cp->lock);
664 cp->state = IP_VS_TCP_S_LISTEN; 659 cp->state = IP_VS_TCP_S_LISTEN;
665 cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] 660 cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
666 : tcp_timeouts[IP_VS_TCP_S_LISTEN]); 661 : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
667 spin_unlock(&cp->lock); 662 spin_unlock_bh(&cp->lock);
668} 663}
669 664
670/* --------------------------------------------- 665/* ---------------------------------------------
@@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
676 struct netns_ipvs *ipvs = net_ipvs(net); 671 struct netns_ipvs *ipvs = net_ipvs(net);
677 672
678 ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); 673 ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
679 spin_lock_init(&ipvs->tcp_app_lock);
680 pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, 674 pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
681 sizeof(tcp_timeouts)); 675 sizeof(tcp_timeouts));
682 if (!pd->timeout_table) 676 if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 503a842c90d2..b62a3c0ff9bf 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
44 return 0; 44 return 0;
45 } 45 }
46 net = skb_net(skb); 46 net = skb_net(skb);
47 svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, 47 rcu_read_lock();
48 &iph->daddr, uh->dest); 48 svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
49 &iph->daddr, uh->dest);
49 if (svc) { 50 if (svc) {
50 int ignored; 51 int ignored;
51 52
@@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
54 * It seems that we are very loaded. 55 * It seems that we are very loaded.
55 * We have to drop this packet :( 56 * We have to drop this packet :(
56 */ 57 */
57 ip_vs_service_put(svc); 58 rcu_read_unlock();
58 *verdict = NF_DROP; 59 *verdict = NF_DROP;
59 return 0; 60 return 0;
60 } 61 }
@@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
67 if (!*cpp && ignored <= 0) { 68 if (!*cpp && ignored <= 0) {
68 if (!ignored) 69 if (!ignored)
69 *verdict = ip_vs_leave(svc, skb, pd, iph); 70 *verdict = ip_vs_leave(svc, skb, pd, iph);
70 else { 71 else
71 ip_vs_service_put(svc);
72 *verdict = NF_DROP; 72 *verdict = NF_DROP;
73 } 73 rcu_read_unlock();
74 return 0; 74 return 0;
75 } 75 }
76 ip_vs_service_put(svc);
77 } 76 }
77 rcu_read_unlock();
78 /* NF_ACCEPT */ 78 /* NF_ACCEPT */
79 return 1; 79 return 1;
80} 80}
@@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
359 359
360 hash = udp_app_hashkey(port); 360 hash = udp_app_hashkey(port);
361 361
362
363 spin_lock_bh(&ipvs->udp_app_lock);
364 list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { 362 list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
365 if (i->port == port) { 363 if (i->port == port) {
366 ret = -EEXIST; 364 ret = -EEXIST;
367 goto out; 365 goto out;
368 } 366 }
369 } 367 }
370 list_add(&inc->p_list, &ipvs->udp_apps[hash]); 368 list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
371 atomic_inc(&pd->appcnt); 369 atomic_inc(&pd->appcnt);
372 370
373 out: 371 out:
374 spin_unlock_bh(&ipvs->udp_app_lock);
375 return ret; 372 return ret;
376} 373}
377 374
@@ -380,12 +377,9 @@ static void
380udp_unregister_app(struct net *net, struct ip_vs_app *inc) 377udp_unregister_app(struct net *net, struct ip_vs_app *inc)
381{ 378{
382 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 379 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
383 struct netns_ipvs *ipvs = net_ipvs(net);
384 380
385 spin_lock_bh(&ipvs->udp_app_lock);
386 atomic_dec(&pd->appcnt); 381 atomic_dec(&pd->appcnt);
387 list_del(&inc->p_list); 382 list_del_rcu(&inc->p_list);
388 spin_unlock_bh(&ipvs->udp_app_lock);
389} 383}
390 384
391 385
@@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
403 /* Lookup application incarnations and bind the right one */ 397 /* Lookup application incarnations and bind the right one */
404 hash = udp_app_hashkey(cp->vport); 398 hash = udp_app_hashkey(cp->vport);
405 399
406 spin_lock(&ipvs->udp_app_lock); 400 rcu_read_lock();
407 list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { 401 list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
408 if (inc->port == cp->vport) { 402 if (inc->port == cp->vport) {
409 if (unlikely(!ip_vs_app_inc_get(inc))) 403 if (unlikely(!ip_vs_app_inc_get(inc)))
410 break; 404 break;
411 spin_unlock(&ipvs->udp_app_lock); 405 rcu_read_unlock();
412 406
413 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 407 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
414 "%s:%u to app %s on port %u\n", 408 "%s:%u to app %s on port %u\n",
@@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
425 goto out; 419 goto out;
426 } 420 }
427 } 421 }
428 spin_unlock(&ipvs->udp_app_lock); 422 rcu_read_unlock();
429 423
430 out: 424 out:
431 return result; 425 return result;
@@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
467 struct netns_ipvs *ipvs = net_ipvs(net); 461 struct netns_ipvs *ipvs = net_ipvs(net);
468 462
469 ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); 463 ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
470 spin_lock_init(&ipvs->udp_app_lock);
471 pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, 464 pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
472 sizeof(udp_timeouts)); 465 sizeof(udp_timeouts));
473 if (!pd->timeout_table) 466 if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index c49b388d1085..c35986c793d9 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
35} 35}
36 36
37 37
38static int ip_vs_rr_update_svc(struct ip_vs_service *svc) 38static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
39{ 39{
40 svc->sched_data = &svc->destinations; 40 struct list_head *p;
41
42 spin_lock_bh(&svc->sched_lock);
43 p = (struct list_head *) svc->sched_data;
44 /* dest is already unlinked, so p->prev is not valid but
45 * p->next is valid, use it to reach previous entry.
46 */
47 if (p == &dest->n_list)
48 svc->sched_data = p->next->prev;
49 spin_unlock_bh(&svc->sched_lock);
41 return 0; 50 return 0;
42} 51}
43 52
@@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
48static struct ip_vs_dest * 57static struct ip_vs_dest *
49ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 58ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
50{ 59{
51 struct list_head *p, *q; 60 struct list_head *p;
52 struct ip_vs_dest *dest; 61 struct ip_vs_dest *dest, *last;
62 int pass = 0;
53 63
54 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 64 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
55 65
56 write_lock(&svc->sched_lock); 66 spin_lock_bh(&svc->sched_lock);
57 p = (struct list_head *)svc->sched_data; 67 p = (struct list_head *) svc->sched_data;
58 p = p->next; 68 last = dest = list_entry(p, struct ip_vs_dest, n_list);
59 q = p; 69
60 do { 70 do {
61 /* skip list head */ 71 list_for_each_entry_continue_rcu(dest,
62 if (q == &svc->destinations) { 72 &svc->destinations,
63 q = q->next; 73 n_list) {
64 continue; 74 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
75 atomic_read(&dest->weight) > 0)
76 /* HIT */
77 goto out;
78 if (dest == last)
79 goto stop;
65 } 80 }
66 81 pass++;
67 dest = list_entry(q, struct ip_vs_dest, n_list); 82 /* Previous dest could be unlinked, do not loop forever.
68 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && 83 * If we stay at head there is no need for 2nd pass.
69 atomic_read(&dest->weight) > 0) 84 */
70 /* HIT */ 85 } while (pass < 2 && p != &svc->destinations);
71 goto out; 86
72 q = q->next; 87stop:
73 } while (q != p); 88 spin_unlock_bh(&svc->sched_lock);
74 write_unlock(&svc->sched_lock);
75 ip_vs_scheduler_err(svc, "no destination available"); 89 ip_vs_scheduler_err(svc, "no destination available");
76 return NULL; 90 return NULL;
77 91
78 out: 92 out:
79 svc->sched_data = q; 93 svc->sched_data = &dest->n_list;
80 write_unlock(&svc->sched_lock); 94 spin_unlock_bh(&svc->sched_lock);
81 IP_VS_DBG_BUF(6, "RR: server %s:%u " 95 IP_VS_DBG_BUF(6, "RR: server %s:%u "
82 "activeconns %d refcnt %d weight %d\n", 96 "activeconns %d refcnt %d weight %d\n",
83 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), 97 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
@@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
94 .module = THIS_MODULE, 108 .module = THIS_MODULE,
95 .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), 109 .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
96 .init_service = ip_vs_rr_init_svc, 110 .init_service = ip_vs_rr_init_svc,
97 .update_service = ip_vs_rr_update_svc, 111 .add_dest = NULL,
112 .del_dest = ip_vs_rr_del_dest,
98 .schedule = ip_vs_rr_schedule, 113 .schedule = ip_vs_rr_schedule,
99}; 114};
100 115
@@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void)
106static void __exit ip_vs_rr_cleanup(void) 121static void __exit ip_vs_rr_cleanup(void)
107{ 122{
108 unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); 123 unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
124 synchronize_rcu();
109} 125}
110 126
111module_init(ip_vs_rr_init); 127module_init(ip_vs_rr_init);
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index d6bf20d6cdbe..4dbcda6258bc 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -35,8 +35,8 @@ EXPORT_SYMBOL(ip_vs_scheduler_err);
35 */ 35 */
36static LIST_HEAD(ip_vs_schedulers); 36static LIST_HEAD(ip_vs_schedulers);
37 37
38/* lock for service table */ 38/* semaphore for schedulers */
39static DEFINE_SPINLOCK(ip_vs_sched_lock); 39static DEFINE_MUTEX(ip_vs_sched_mutex);
40 40
41 41
42/* 42/*
@@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
47{ 47{
48 int ret; 48 int ret;
49 49
50 svc->scheduler = scheduler;
51
52 if (scheduler->init_service) { 50 if (scheduler->init_service) {
53 ret = scheduler->init_service(svc); 51 ret = scheduler->init_service(svc);
54 if (ret) { 52 if (ret) {
@@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
56 return ret; 54 return ret;
57 } 55 }
58 } 56 }
59 57 rcu_assign_pointer(svc->scheduler, scheduler);
60 return 0; 58 return 0;
61} 59}
62 60
@@ -64,22 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
64/* 62/*
65 * Unbind a service with its scheduler 63 * Unbind a service with its scheduler
66 */ 64 */
67int ip_vs_unbind_scheduler(struct ip_vs_service *svc) 65void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
66 struct ip_vs_scheduler *sched)
68{ 67{
69 struct ip_vs_scheduler *sched = svc->scheduler; 68 struct ip_vs_scheduler *cur_sched;
70 69
71 if (!sched) 70 cur_sched = rcu_dereference_protected(svc->scheduler, 1);
72 return 0; 71 /* This check proves that old 'sched' was installed */
72 if (!cur_sched)
73 return;
73 74
74 if (sched->done_service) { 75 if (sched->done_service)
75 if (sched->done_service(svc) != 0) { 76 sched->done_service(svc);
76 pr_err("%s(): done error\n", __func__); 77 /* svc->scheduler can not be set to NULL */
77 return -EINVAL;
78 }
79 }
80
81 svc->scheduler = NULL;
82 return 0;
83} 78}
84 79
85 80
@@ -92,7 +87,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
92 87
93 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); 88 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
94 89
95 spin_lock_bh(&ip_vs_sched_lock); 90 mutex_lock(&ip_vs_sched_mutex);
96 91
97 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 92 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
98 /* 93 /*
@@ -106,14 +101,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
106 } 101 }
107 if (strcmp(sched_name, sched->name)==0) { 102 if (strcmp(sched_name, sched->name)==0) {
108 /* HIT */ 103 /* HIT */
109 spin_unlock_bh(&ip_vs_sched_lock); 104 mutex_unlock(&ip_vs_sched_mutex);
110 return sched; 105 return sched;
111 } 106 }
112 if (sched->module) 107 if (sched->module)
113 module_put(sched->module); 108 module_put(sched->module);
114 } 109 }
115 110
116 spin_unlock_bh(&ip_vs_sched_lock); 111 mutex_unlock(&ip_vs_sched_mutex);
117 return NULL; 112 return NULL;
118} 113}
119 114
@@ -153,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
153 148
154void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) 149void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
155{ 150{
151 struct ip_vs_scheduler *sched;
152
153 sched = rcu_dereference(svc->scheduler);
156 if (svc->fwmark) { 154 if (svc->fwmark) {
157 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", 155 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
158 svc->scheduler->name, svc->fwmark, 156 sched->name, svc->fwmark, svc->fwmark, msg);
159 svc->fwmark, msg);
160#ifdef CONFIG_IP_VS_IPV6 157#ifdef CONFIG_IP_VS_IPV6
161 } else if (svc->af == AF_INET6) { 158 } else if (svc->af == AF_INET6) {
162 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", 159 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
163 svc->scheduler->name, 160 sched->name, ip_vs_proto_name(svc->protocol),
164 ip_vs_proto_name(svc->protocol),
165 &svc->addr.in6, ntohs(svc->port), msg); 161 &svc->addr.in6, ntohs(svc->port), msg);
166#endif 162#endif
167 } else { 163 } else {
168 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", 164 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
169 svc->scheduler->name, 165 sched->name, ip_vs_proto_name(svc->protocol),
170 ip_vs_proto_name(svc->protocol),
171 &svc->addr.ip, ntohs(svc->port), msg); 166 &svc->addr.ip, ntohs(svc->port), msg);
172 } 167 }
173} 168}
@@ -192,10 +187,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
192 /* increase the module use count */ 187 /* increase the module use count */
193 ip_vs_use_count_inc(); 188 ip_vs_use_count_inc();
194 189
195 spin_lock_bh(&ip_vs_sched_lock); 190 mutex_lock(&ip_vs_sched_mutex);
196 191
197 if (!list_empty(&scheduler->n_list)) { 192 if (!list_empty(&scheduler->n_list)) {
198 spin_unlock_bh(&ip_vs_sched_lock); 193 mutex_unlock(&ip_vs_sched_mutex);
199 ip_vs_use_count_dec(); 194 ip_vs_use_count_dec();
200 pr_err("%s(): [%s] scheduler already linked\n", 195 pr_err("%s(): [%s] scheduler already linked\n",
201 __func__, scheduler->name); 196 __func__, scheduler->name);
@@ -208,7 +203,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
208 */ 203 */
209 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 204 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
210 if (strcmp(scheduler->name, sched->name) == 0) { 205 if (strcmp(scheduler->name, sched->name) == 0) {
211 spin_unlock_bh(&ip_vs_sched_lock); 206 mutex_unlock(&ip_vs_sched_mutex);
212 ip_vs_use_count_dec(); 207 ip_vs_use_count_dec();
213 pr_err("%s(): [%s] scheduler already existed " 208 pr_err("%s(): [%s] scheduler already existed "
214 "in the system\n", __func__, scheduler->name); 209 "in the system\n", __func__, scheduler->name);
@@ -219,7 +214,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
219 * Add it into the d-linked scheduler list 214 * Add it into the d-linked scheduler list
220 */ 215 */
221 list_add(&scheduler->n_list, &ip_vs_schedulers); 216 list_add(&scheduler->n_list, &ip_vs_schedulers);
222 spin_unlock_bh(&ip_vs_sched_lock); 217 mutex_unlock(&ip_vs_sched_mutex);
223 218
224 pr_info("[%s] scheduler registered.\n", scheduler->name); 219 pr_info("[%s] scheduler registered.\n", scheduler->name);
225 220
@@ -237,9 +232,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
237 return -EINVAL; 232 return -EINVAL;
238 } 233 }
239 234
240 spin_lock_bh(&ip_vs_sched_lock); 235 mutex_lock(&ip_vs_sched_mutex);
241 if (list_empty(&scheduler->n_list)) { 236 if (list_empty(&scheduler->n_list)) {
242 spin_unlock_bh(&ip_vs_sched_lock); 237 mutex_unlock(&ip_vs_sched_mutex);
243 pr_err("%s(): [%s] scheduler is not in the list. failed\n", 238 pr_err("%s(): [%s] scheduler is not in the list. failed\n",
244 __func__, scheduler->name); 239 __func__, scheduler->name);
245 return -EINVAL; 240 return -EINVAL;
@@ -249,7 +244,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
249 * Remove it from the d-linked scheduler list 244 * Remove it from the d-linked scheduler list
250 */ 245 */
251 list_del(&scheduler->n_list); 246 list_del(&scheduler->n_list);
252 spin_unlock_bh(&ip_vs_sched_lock); 247 mutex_unlock(&ip_vs_sched_mutex);
253 248
254 /* decrease the module use count */ 249 /* decrease the module use count */
255 ip_vs_use_count_dec(); 250 ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 89ead246ed3d..f3205925359a 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -79,7 +79,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
79 * new connections. 79 * new connections.
80 */ 80 */
81 81
82 list_for_each_entry(dest, &svc->destinations, n_list) { 82 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
83 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && 83 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
84 atomic_read(&dest->weight) > 0) { 84 atomic_read(&dest->weight) > 0) {
85 least = dest; 85 least = dest;
@@ -94,7 +94,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
94 * Find the destination with the least load. 94 * Find the destination with the least load.
95 */ 95 */
96 nextstage: 96 nextstage:
97 list_for_each_entry_continue(dest, &svc->destinations, n_list) { 97 list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
98 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 98 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
99 continue; 99 continue;
100 doh = ip_vs_sed_dest_overhead(dest); 100 doh = ip_vs_sed_dest_overhead(dest);
@@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void)
134static void __exit ip_vs_sed_cleanup(void) 134static void __exit ip_vs_sed_cleanup(void)
135{ 135{
136 unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); 136 unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
137 synchronize_rcu();
137} 138}
138 139
139module_init(ip_vs_sed_init); 140module_init(ip_vs_sed_init);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e33126994628..0df269d7c99f 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -53,7 +53,7 @@
53 * IPVS SH bucket 53 * IPVS SH bucket
54 */ 54 */
55struct ip_vs_sh_bucket { 55struct ip_vs_sh_bucket {
56 struct ip_vs_dest *dest; /* real server (cache) */ 56 struct ip_vs_dest __rcu *dest; /* real server (cache) */
57}; 57};
58 58
59/* 59/*
@@ -66,6 +66,10 @@ struct ip_vs_sh_bucket {
66#define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS) 66#define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS)
67#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1) 67#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1)
68 68
69struct ip_vs_sh_state {
70 struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE];
71 struct rcu_head rcu_head;
72};
69 73
70/* 74/*
71 * Returns hash value for IPVS SH entry 75 * Returns hash value for IPVS SH entry
@@ -87,10 +91,9 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
87 * Get ip_vs_dest associated with supplied parameters. 91 * Get ip_vs_dest associated with supplied parameters.
88 */ 92 */
89static inline struct ip_vs_dest * 93static inline struct ip_vs_dest *
90ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl, 94ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
91 const union nf_inet_addr *addr)
92{ 95{
93 return (tbl[ip_vs_sh_hashkey(af, addr)]).dest; 96 return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
94} 97}
95 98
96 99
@@ -98,27 +101,32 @@ ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
98 * Assign all the hash buckets of the specified table with the service. 101 * Assign all the hash buckets of the specified table with the service.
99 */ 102 */
100static int 103static int
101ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) 104ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
102{ 105{
103 int i; 106 int i;
104 struct ip_vs_sh_bucket *b; 107 struct ip_vs_sh_bucket *b;
105 struct list_head *p; 108 struct list_head *p;
106 struct ip_vs_dest *dest; 109 struct ip_vs_dest *dest;
107 int d_count; 110 int d_count;
111 bool empty;
108 112
109 b = tbl; 113 b = &s->buckets[0];
110 p = &svc->destinations; 114 p = &svc->destinations;
115 empty = list_empty(p);
111 d_count = 0; 116 d_count = 0;
112 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { 117 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
113 if (list_empty(p)) { 118 dest = rcu_dereference_protected(b->dest, 1);
114 b->dest = NULL; 119 if (dest)
115 } else { 120 ip_vs_dest_put(dest);
121 if (empty)
122 RCU_INIT_POINTER(b->dest, NULL);
123 else {
116 if (p == &svc->destinations) 124 if (p == &svc->destinations)
117 p = p->next; 125 p = p->next;
118 126
119 dest = list_entry(p, struct ip_vs_dest, n_list); 127 dest = list_entry(p, struct ip_vs_dest, n_list);
120 atomic_inc(&dest->refcnt); 128 ip_vs_dest_hold(dest);
121 b->dest = dest; 129 RCU_INIT_POINTER(b->dest, dest);
122 130
123 IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", 131 IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
124 i, IP_VS_DBG_ADDR(svc->af, &dest->addr), 132 i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
@@ -140,16 +148,18 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
140/* 148/*
141 * Flush all the hash buckets of the specified table. 149 * Flush all the hash buckets of the specified table.
142 */ 150 */
143static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl) 151static void ip_vs_sh_flush(struct ip_vs_sh_state *s)
144{ 152{
145 int i; 153 int i;
146 struct ip_vs_sh_bucket *b; 154 struct ip_vs_sh_bucket *b;
155 struct ip_vs_dest *dest;
147 156
148 b = tbl; 157 b = &s->buckets[0];
149 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { 158 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
150 if (b->dest) { 159 dest = rcu_dereference_protected(b->dest, 1);
151 atomic_dec(&b->dest->refcnt); 160 if (dest) {
152 b->dest = NULL; 161 ip_vs_dest_put(dest);
162 RCU_INIT_POINTER(b->dest, NULL);
153 } 163 }
154 b++; 164 b++;
155 } 165 }
@@ -158,51 +168,46 @@ static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
158 168
159static int ip_vs_sh_init_svc(struct ip_vs_service *svc) 169static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
160{ 170{
161 struct ip_vs_sh_bucket *tbl; 171 struct ip_vs_sh_state *s;
162 172
163 /* allocate the SH table for this service */ 173 /* allocate the SH table for this service */
164 tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, 174 s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL);
165 GFP_KERNEL); 175 if (s == NULL)
166 if (tbl == NULL)
167 return -ENOMEM; 176 return -ENOMEM;
168 177
169 svc->sched_data = tbl; 178 svc->sched_data = s;
170 IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for " 179 IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
171 "current service\n", 180 "current service\n",
172 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); 181 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
173 182
174 /* assign the hash buckets with the updated service */ 183 /* assign the hash buckets with current dests */
175 ip_vs_sh_assign(tbl, svc); 184 ip_vs_sh_reassign(s, svc);
176 185
177 return 0; 186 return 0;
178} 187}
179 188
180 189
181static int ip_vs_sh_done_svc(struct ip_vs_service *svc) 190static void ip_vs_sh_done_svc(struct ip_vs_service *svc)
182{ 191{
183 struct ip_vs_sh_bucket *tbl = svc->sched_data; 192 struct ip_vs_sh_state *s = svc->sched_data;
184 193
185 /* got to clean up hash buckets here */ 194 /* got to clean up hash buckets here */
186 ip_vs_sh_flush(tbl); 195 ip_vs_sh_flush(s);
187 196
188 /* release the table itself */ 197 /* release the table itself */
189 kfree(svc->sched_data); 198 kfree_rcu(s, rcu_head);
190 IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n", 199 IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
191 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); 200 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
192
193 return 0;
194} 201}
195 202
196 203
197static int ip_vs_sh_update_svc(struct ip_vs_service *svc) 204static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
205 struct ip_vs_dest *dest)
198{ 206{
199 struct ip_vs_sh_bucket *tbl = svc->sched_data; 207 struct ip_vs_sh_state *s = svc->sched_data;
200
201 /* got to clean up hash buckets here */
202 ip_vs_sh_flush(tbl);
203 208
204 /* assign the hash buckets with the updated service */ 209 /* assign the hash buckets with the updated service */
205 ip_vs_sh_assign(tbl, svc); 210 ip_vs_sh_reassign(s, svc);
206 211
207 return 0; 212 return 0;
208} 213}
@@ -225,15 +230,15 @@ static struct ip_vs_dest *
225ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 230ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
226{ 231{
227 struct ip_vs_dest *dest; 232 struct ip_vs_dest *dest;
228 struct ip_vs_sh_bucket *tbl; 233 struct ip_vs_sh_state *s;
229 struct ip_vs_iphdr iph; 234 struct ip_vs_iphdr iph;
230 235
231 ip_vs_fill_iph_addr_only(svc->af, skb, &iph); 236 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
232 237
233 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 238 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
234 239
235 tbl = (struct ip_vs_sh_bucket *)svc->sched_data; 240 s = (struct ip_vs_sh_state *) svc->sched_data;
236 dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr); 241 dest = ip_vs_sh_get(svc->af, s, &iph.saddr);
237 if (!dest 242 if (!dest
238 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 243 || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
239 || atomic_read(&dest->weight) <= 0 244 || atomic_read(&dest->weight) <= 0
@@ -262,7 +267,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
262 .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), 267 .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
263 .init_service = ip_vs_sh_init_svc, 268 .init_service = ip_vs_sh_init_svc,
264 .done_service = ip_vs_sh_done_svc, 269 .done_service = ip_vs_sh_done_svc,
265 .update_service = ip_vs_sh_update_svc, 270 .add_dest = ip_vs_sh_dest_changed,
271 .del_dest = ip_vs_sh_dest_changed,
272 .upd_dest = ip_vs_sh_dest_changed,
266 .schedule = ip_vs_sh_schedule, 273 .schedule = ip_vs_sh_schedule,
267}; 274};
268 275
@@ -276,6 +283,7 @@ static int __init ip_vs_sh_init(void)
276static void __exit ip_vs_sh_cleanup(void) 283static void __exit ip_vs_sh_cleanup(void)
277{ 284{
278 unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); 285 unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
286 synchronize_rcu();
279} 287}
280 288
281 289
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 44fd10c539ac..8e57077e5540 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -531,9 +531,9 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
531 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) 531 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
532 return; 532 return;
533 533
534 spin_lock(&ipvs->sync_buff_lock); 534 spin_lock_bh(&ipvs->sync_buff_lock);
535 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { 535 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
536 spin_unlock(&ipvs->sync_buff_lock); 536 spin_unlock_bh(&ipvs->sync_buff_lock);
537 return; 537 return;
538 } 538 }
539 539
@@ -552,7 +552,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
552 if (!buff) { 552 if (!buff) {
553 buff = ip_vs_sync_buff_create_v0(ipvs); 553 buff = ip_vs_sync_buff_create_v0(ipvs);
554 if (!buff) { 554 if (!buff) {
555 spin_unlock(&ipvs->sync_buff_lock); 555 spin_unlock_bh(&ipvs->sync_buff_lock);
556 pr_err("ip_vs_sync_buff_create failed.\n"); 556 pr_err("ip_vs_sync_buff_create failed.\n");
557 return; 557 return;
558 } 558 }
@@ -590,7 +590,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
590 sb_queue_tail(ipvs, ms); 590 sb_queue_tail(ipvs, ms);
591 ms->sync_buff = NULL; 591 ms->sync_buff = NULL;
592 } 592 }
593 spin_unlock(&ipvs->sync_buff_lock); 593 spin_unlock_bh(&ipvs->sync_buff_lock);
594 594
595 /* synchronize its controller if it has */ 595 /* synchronize its controller if it has */
596 cp = cp->control; 596 cp = cp->control;
@@ -641,9 +641,9 @@ sloop:
641 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); 641 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
642 } 642 }
643 643
644 spin_lock(&ipvs->sync_buff_lock); 644 spin_lock_bh(&ipvs->sync_buff_lock);
645 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { 645 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
646 spin_unlock(&ipvs->sync_buff_lock); 646 spin_unlock_bh(&ipvs->sync_buff_lock);
647 return; 647 return;
648 } 648 }
649 649
@@ -683,7 +683,7 @@ sloop:
683 if (!buff) { 683 if (!buff) {
684 buff = ip_vs_sync_buff_create(ipvs); 684 buff = ip_vs_sync_buff_create(ipvs);
685 if (!buff) { 685 if (!buff) {
686 spin_unlock(&ipvs->sync_buff_lock); 686 spin_unlock_bh(&ipvs->sync_buff_lock);
687 pr_err("ip_vs_sync_buff_create failed.\n"); 687 pr_err("ip_vs_sync_buff_create failed.\n");
688 return; 688 return;
689 } 689 }
@@ -750,7 +750,7 @@ sloop:
750 } 750 }
751 } 751 }
752 752
753 spin_unlock(&ipvs->sync_buff_lock); 753 spin_unlock_bh(&ipvs->sync_buff_lock);
754 754
755control: 755control:
756 /* synchronize its controller if it has */ 756 /* synchronize its controller if it has */
@@ -843,7 +843,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
843 kfree(param->pe_data); 843 kfree(param->pe_data);
844 844
845 dest = cp->dest; 845 dest = cp->dest;
846 spin_lock(&cp->lock); 846 spin_lock_bh(&cp->lock);
847 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && 847 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
848 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { 848 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
849 if (flags & IP_VS_CONN_F_INACTIVE) { 849 if (flags & IP_VS_CONN_F_INACTIVE) {
@@ -857,24 +857,21 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
857 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; 857 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
858 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; 858 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
859 cp->flags = flags; 859 cp->flags = flags;
860 spin_unlock(&cp->lock); 860 spin_unlock_bh(&cp->lock);
861 if (!dest) { 861 if (!dest)
862 dest = ip_vs_try_bind_dest(cp); 862 ip_vs_try_bind_dest(cp);
863 if (dest)
864 atomic_dec(&dest->refcnt);
865 }
866 } else { 863 } else {
867 /* 864 /*
868 * Find the appropriate destination for the connection. 865 * Find the appropriate destination for the connection.
869 * If it is not found the connection will remain unbound 866 * If it is not found the connection will remain unbound
870 * but still handled. 867 * but still handled.
871 */ 868 */
869 rcu_read_lock();
872 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, 870 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
873 param->vport, protocol, fwmark, flags); 871 param->vport, protocol, fwmark, flags);
874 872
875 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); 873 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
876 if (dest) 874 rcu_read_unlock();
877 atomic_dec(&dest->refcnt);
878 if (!cp) { 875 if (!cp) {
879 if (param->pe_data) 876 if (param->pe_data)
880 kfree(param->pe_data); 877 kfree(param->pe_data);
@@ -1692,11 +1689,7 @@ static int sync_thread_backup(void *data)
1692 break; 1689 break;
1693 } 1690 }
1694 1691
1695 /* disable bottom half, because it accesses the data
1696 shared by softirq while getting/creating conns */
1697 local_bh_disable();
1698 ip_vs_process_message(tinfo->net, tinfo->buf, len); 1692 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1699 local_bh_enable();
1700 } 1693 }
1701 } 1694 }
1702 1695
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index bc1bfc48a17f..c60a81c4ce9a 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -51,7 +51,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
51 * new connections. 51 * new connections.
52 */ 52 */
53 53
54 list_for_each_entry(dest, &svc->destinations, n_list) { 54 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
55 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && 55 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
56 atomic_read(&dest->weight) > 0) { 56 atomic_read(&dest->weight) > 0) {
57 least = dest; 57 least = dest;
@@ -66,7 +66,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
66 * Find the destination with the least load. 66 * Find the destination with the least load.
67 */ 67 */
68 nextstage: 68 nextstage:
69 list_for_each_entry_continue(dest, &svc->destinations, n_list) { 69 list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
70 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 70 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
71 continue; 71 continue;
72 doh = ip_vs_dest_conn_overhead(dest); 72 doh = ip_vs_dest_conn_overhead(dest);
@@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void)
106static void __exit ip_vs_wlc_cleanup(void) 106static void __exit ip_vs_wlc_cleanup(void)
107{ 107{
108 unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler); 108 unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
109 synchronize_rcu();
109} 110}
110 111
111module_init(ip_vs_wlc_init); 112module_init(ip_vs_wlc_init);
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 231be7dd547a..0e68555bceb9 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -29,14 +29,45 @@
29 29
30#include <net/ip_vs.h> 30#include <net/ip_vs.h>
31 31
32/* The WRR algorithm depends on some caclulations:
33 * - mw: maximum weight
34 * - di: weight step, greatest common divisor from all weights
35 * - cw: current required weight
36 * As result, all weights are in the [di..mw] range with a step=di.
37 *
38 * First, we start with cw = mw and select dests with weight >= cw.
39 * Then cw is reduced with di and all dests are checked again.
40 * Last pass should be with cw = di. We have mw/di passes in total:
41 *
42 * pass 1: cw = max weight
43 * pass 2: cw = max weight - di
44 * pass 3: cw = max weight - 2 * di
45 * ...
46 * last pass: cw = di
47 *
48 * Weights are supposed to be >= di but we run in parallel with
49 * weight changes, it is possible some dest weight to be reduced
50 * below di, bad if it is the only available dest.
51 *
52 * So, we modify how mw is calculated, now it is reduced with (di - 1),
53 * so that last cw is 1 to catch such dests with weight below di:
54 * pass 1: cw = max weight - (di - 1)
55 * pass 2: cw = max weight - di - (di - 1)
56 * pass 3: cw = max weight - 2 * di - (di - 1)
57 * ...
58 * last pass: cw = 1
59 *
60 */
61
32/* 62/*
33 * current destination pointer for weighted round-robin scheduling 63 * current destination pointer for weighted round-robin scheduling
34 */ 64 */
35struct ip_vs_wrr_mark { 65struct ip_vs_wrr_mark {
36 struct list_head *cl; /* current list head */ 66 struct ip_vs_dest *cl; /* current dest or head */
37 int cw; /* current weight */ 67 int cw; /* current weight */
38 int mw; /* maximum weight */ 68 int mw; /* maximum weight */
39 int di; /* decreasing interval */ 69 int di; /* decreasing interval */
70 struct rcu_head rcu_head;
40}; 71};
41 72
42 73
@@ -88,36 +119,41 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
88 if (mark == NULL) 119 if (mark == NULL)
89 return -ENOMEM; 120 return -ENOMEM;
90 121
91 mark->cl = &svc->destinations; 122 mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
92 mark->cw = 0;
93 mark->mw = ip_vs_wrr_max_weight(svc);
94 mark->di = ip_vs_wrr_gcd_weight(svc); 123 mark->di = ip_vs_wrr_gcd_weight(svc);
124 mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
125 mark->cw = mark->mw;
95 svc->sched_data = mark; 126 svc->sched_data = mark;
96 127
97 return 0; 128 return 0;
98} 129}
99 130
100 131
101static int ip_vs_wrr_done_svc(struct ip_vs_service *svc) 132static void ip_vs_wrr_done_svc(struct ip_vs_service *svc)
102{ 133{
134 struct ip_vs_wrr_mark *mark = svc->sched_data;
135
103 /* 136 /*
104 * Release the mark variable 137 * Release the mark variable
105 */ 138 */
106 kfree(svc->sched_data); 139 kfree_rcu(mark, rcu_head);
107
108 return 0;
109} 140}
110 141
111 142
112static int ip_vs_wrr_update_svc(struct ip_vs_service *svc) 143static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc,
144 struct ip_vs_dest *dest)
113{ 145{
114 struct ip_vs_wrr_mark *mark = svc->sched_data; 146 struct ip_vs_wrr_mark *mark = svc->sched_data;
115 147
116 mark->cl = &svc->destinations; 148 spin_lock_bh(&svc->sched_lock);
117 mark->mw = ip_vs_wrr_max_weight(svc); 149 mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
118 mark->di = ip_vs_wrr_gcd_weight(svc); 150 mark->di = ip_vs_wrr_gcd_weight(svc);
119 if (mark->cw > mark->mw) 151 mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
120 mark->cw = 0; 152 if (mark->cw > mark->mw || !mark->cw)
153 mark->cw = mark->mw;
154 else if (mark->di > 1)
155 mark->cw = (mark->cw / mark->di) * mark->di + 1;
156 spin_unlock_bh(&svc->sched_lock);
121 return 0; 157 return 0;
122} 158}
123 159
@@ -128,80 +164,79 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
128static struct ip_vs_dest * 164static struct ip_vs_dest *
129ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 165ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
130{ 166{
131 struct ip_vs_dest *dest; 167 struct ip_vs_dest *dest, *last, *stop = NULL;
132 struct ip_vs_wrr_mark *mark = svc->sched_data; 168 struct ip_vs_wrr_mark *mark = svc->sched_data;
133 struct list_head *p; 169 bool last_pass = false, restarted = false;
134 170
135 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 171 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
136 172
137 /* 173 spin_lock_bh(&svc->sched_lock);
138 * This loop will always terminate, because mark->cw in (0, max_weight] 174 dest = mark->cl;
139 * and at least one server has its weight equal to max_weight. 175 /* No available dests? */
140 */ 176 if (mark->mw == 0)
141 write_lock(&svc->sched_lock); 177 goto err_noavail;
142 p = mark->cl; 178 last = dest;
179 /* Stop only after all dests were checked for weight >= 1 (last pass) */
143 while (1) { 180 while (1) {
144 if (mark->cl == &svc->destinations) { 181 list_for_each_entry_continue_rcu(dest,
145 /* it is at the head of the destination list */ 182 &svc->destinations,
146 183 n_list) {
147 if (mark->cl == mark->cl->next) {
148 /* no dest entry */
149 ip_vs_scheduler_err(svc,
150 "no destination available: "
151 "no destinations present");
152 dest = NULL;
153 goto out;
154 }
155
156 mark->cl = svc->destinations.next;
157 mark->cw -= mark->di;
158 if (mark->cw <= 0) {
159 mark->cw = mark->mw;
160 /*
161 * Still zero, which means no available servers.
162 */
163 if (mark->cw == 0) {
164 mark->cl = &svc->destinations;
165 ip_vs_scheduler_err(svc,
166 "no destination available");
167 dest = NULL;
168 goto out;
169 }
170 }
171 } else
172 mark->cl = mark->cl->next;
173
174 if (mark->cl != &svc->destinations) {
175 /* not at the head of the list */
176 dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
177 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && 184 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
178 atomic_read(&dest->weight) >= mark->cw) { 185 atomic_read(&dest->weight) >= mark->cw)
179 /* got it */ 186 goto found;
180 break; 187 if (dest == stop)
181 } 188 goto err_over;
182 } 189 }
183 190 mark->cw -= mark->di;
184 if (mark->cl == p && mark->cw == mark->di) { 191 if (mark->cw <= 0) {
185 /* back to the start, and no dest is found. 192 mark->cw = mark->mw;
186 It is only possible when all dests are OVERLOADED */ 193 /* Stop if we tried last pass from first dest:
187 dest = NULL; 194 * 1. last_pass: we started checks when cw > di but
188 ip_vs_scheduler_err(svc, 195 * then all dests were checked for w >= 1
189 "no destination available: " 196 * 2. last was head: the first and only traversal
190 "all destinations are overloaded"); 197 * was for weight >= 1, for all dests.
191 goto out; 198 */
199 if (last_pass ||
200 &last->n_list == &svc->destinations)
201 goto err_over;
202 restarted = true;
203 }
204 last_pass = mark->cw <= mark->di;
205 if (last_pass && restarted &&
206 &last->n_list != &svc->destinations) {
207 /* First traversal was for w >= 1 but only
208 * for dests after 'last', now do the same
209 * for all dests up to 'last'.
210 */
211 stop = last;
192 } 212 }
193 } 213 }
194 214
215found:
195 IP_VS_DBG_BUF(6, "WRR: server %s:%u " 216 IP_VS_DBG_BUF(6, "WRR: server %s:%u "
196 "activeconns %d refcnt %d weight %d\n", 217 "activeconns %d refcnt %d weight %d\n",
197 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), 218 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
198 atomic_read(&dest->activeconns), 219 atomic_read(&dest->activeconns),
199 atomic_read(&dest->refcnt), 220 atomic_read(&dest->refcnt),
200 atomic_read(&dest->weight)); 221 atomic_read(&dest->weight));
222 mark->cl = dest;
201 223
202 out: 224 out:
203 write_unlock(&svc->sched_lock); 225 spin_unlock_bh(&svc->sched_lock);
204 return dest; 226 return dest;
227
228err_noavail:
229 mark->cl = dest;
230 dest = NULL;
231 ip_vs_scheduler_err(svc, "no destination available");
232 goto out;
233
234err_over:
235 mark->cl = dest;
236 dest = NULL;
237 ip_vs_scheduler_err(svc, "no destination available: "
238 "all destinations are overloaded");
239 goto out;
205} 240}
206 241
207 242
@@ -212,7 +247,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
212 .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), 247 .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
213 .init_service = ip_vs_wrr_init_svc, 248 .init_service = ip_vs_wrr_init_svc,
214 .done_service = ip_vs_wrr_done_svc, 249 .done_service = ip_vs_wrr_done_svc,
215 .update_service = ip_vs_wrr_update_svc, 250 .add_dest = ip_vs_wrr_dest_changed,
251 .del_dest = ip_vs_wrr_dest_changed,
252 .upd_dest = ip_vs_wrr_dest_changed,
216 .schedule = ip_vs_wrr_schedule, 253 .schedule = ip_vs_wrr_schedule,
217}; 254};
218 255
@@ -224,6 +261,7 @@ static int __init ip_vs_wrr_init(void)
224static void __exit ip_vs_wrr_cleanup(void) 261static void __exit ip_vs_wrr_cleanup(void)
225{ 262{
226 unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler); 263 unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
264 synchronize_rcu();
227} 265}
228 266
229module_init(ip_vs_wrr_init); 267module_init(ip_vs_wrr_init);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index ee6b7a9f1ec2..b75ff6429a04 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -17,6 +17,8 @@
17 * - not all connections have destination server, for example, 17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used 18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet 19 * - bypass connections use daddr from packet
20 * - we can use dst without ref while sending in RCU section, we use
21 * ref when returning NF_ACCEPT for NAT-ed packet via loopback
20 * LOCAL_OUT rules: 22 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) 23 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet 24 * - skb->pkt_type is not set yet
@@ -51,39 +53,54 @@ enum {
51 */ 53 */
52 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ 54 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
53 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ 55 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
56 IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */
54}; 57};
55 58
59static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void)
60{
61 return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC);
62}
63
64static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst)
65{
66 kfree(dest_dst);
67}
68
56/* 69/*
57 * Destination cache to speed up outgoing route lookup 70 * Destination cache to speed up outgoing route lookup
58 */ 71 */
59static inline void 72static inline void
60__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, 73__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst,
61 u32 dst_cookie) 74 struct dst_entry *dst, u32 dst_cookie)
62{ 75{
63 struct dst_entry *old_dst; 76 struct ip_vs_dest_dst *old;
77
78 old = rcu_dereference_protected(dest->dest_dst,
79 lockdep_is_held(&dest->dst_lock));
64 80
65 old_dst = dest->dst_cache; 81 if (dest_dst) {
66 dest->dst_cache = dst; 82 dest_dst->dst_cache = dst;
67 dest->dst_rtos = rtos; 83 dest_dst->dst_cookie = dst_cookie;
68 dest->dst_cookie = dst_cookie; 84 }
69 dst_release(old_dst); 85 rcu_assign_pointer(dest->dest_dst, dest_dst);
86
87 if (old)
88 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
70} 89}
71 90
72static inline struct dst_entry * 91static inline struct ip_vs_dest_dst *
73__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) 92__ip_vs_dst_check(struct ip_vs_dest *dest)
74{ 93{
75 struct dst_entry *dst = dest->dst_cache; 94 struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst);
95 struct dst_entry *dst;
76 96
77 if (!dst) 97 if (!dest_dst)
78 return NULL; 98 return NULL;
79 if ((dst->obsolete || rtos != dest->dst_rtos) && 99 dst = dest_dst->dst_cache;
80 dst->ops->check(dst, dest->dst_cookie) == NULL) { 100 if (dst->obsolete &&
81 dest->dst_cache = NULL; 101 dst->ops->check(dst, dest_dst->dst_cookie) == NULL)
82 dst_release(dst);
83 return NULL; 102 return NULL;
84 } 103 return dest_dst;
85 dst_hold(dst);
86 return dst;
87} 104}
88 105
89static inline bool 106static inline bool
@@ -104,7 +121,7 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
104 121
105/* Get route to daddr, update *saddr, optionally bind route to saddr */ 122/* Get route to daddr, update *saddr, optionally bind route to saddr */
106static struct rtable *do_output_route4(struct net *net, __be32 daddr, 123static struct rtable *do_output_route4(struct net *net, __be32 daddr,
107 u32 rtos, int rt_mode, __be32 *saddr) 124 int rt_mode, __be32 *saddr)
108{ 125{
109 struct flowi4 fl4; 126 struct flowi4 fl4;
110 struct rtable *rt; 127 struct rtable *rt;
@@ -113,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
113 memset(&fl4, 0, sizeof(fl4)); 130 memset(&fl4, 0, sizeof(fl4));
114 fl4.daddr = daddr; 131 fl4.daddr = daddr;
115 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; 132 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
116 fl4.flowi4_tos = rtos;
117 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? 133 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
118 FLOWI_FLAG_KNOWN_NH : 0; 134 FLOWI_FLAG_KNOWN_NH : 0;
119 135
@@ -124,7 +140,7 @@ retry:
124 if (PTR_ERR(rt) == -EINVAL && *saddr && 140 if (PTR_ERR(rt) == -EINVAL && *saddr &&
125 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { 141 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
126 *saddr = 0; 142 *saddr = 0;
127 flowi4_update_output(&fl4, 0, rtos, daddr, 0); 143 flowi4_update_output(&fl4, 0, 0, daddr, 0);
128 goto retry; 144 goto retry;
129 } 145 }
130 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); 146 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
@@ -132,7 +148,7 @@ retry:
132 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { 148 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
133 ip_rt_put(rt); 149 ip_rt_put(rt);
134 *saddr = fl4.saddr; 150 *saddr = fl4.saddr;
135 flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); 151 flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
136 loop++; 152 loop++;
137 goto retry; 153 goto retry;
138 } 154 }
@@ -141,113 +157,140 @@ retry:
141} 157}
142 158
143/* Get route to destination or remote server */ 159/* Get route to destination or remote server */
144static struct rtable * 160static int
145__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, 161__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
146 __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) 162 __be32 daddr, int rt_mode, __be32 *ret_saddr)
147{ 163{
148 struct net *net = dev_net(skb_dst(skb)->dev); 164 struct net *net = dev_net(skb_dst(skb)->dev);
165 struct netns_ipvs *ipvs = net_ipvs(net);
166 struct ip_vs_dest_dst *dest_dst;
149 struct rtable *rt; /* Route to the other host */ 167 struct rtable *rt; /* Route to the other host */
150 struct rtable *ort; /* Original route */ 168 struct rtable *ort; /* Original route */
151 int local; 169 struct iphdr *iph;
170 __be16 df;
171 int mtu;
172 int local, noref = 1;
152 173
153 if (dest) { 174 if (dest) {
154 spin_lock(&dest->dst_lock); 175 dest_dst = __ip_vs_dst_check(dest);
155 if (!(rt = (struct rtable *) 176 if (likely(dest_dst))
156 __ip_vs_dst_check(dest, rtos))) { 177 rt = (struct rtable *) dest_dst->dst_cache;
157 rt = do_output_route4(net, dest->addr.ip, rtos, 178 else {
158 rt_mode, &dest->dst_saddr.ip); 179 dest_dst = ip_vs_dest_dst_alloc();
180 spin_lock_bh(&dest->dst_lock);
181 if (!dest_dst) {
182 __ip_vs_dst_set(dest, NULL, NULL, 0);
183 spin_unlock_bh(&dest->dst_lock);
184 goto err_unreach;
185 }
186 rt = do_output_route4(net, dest->addr.ip, rt_mode,
187 &dest_dst->dst_saddr.ip);
159 if (!rt) { 188 if (!rt) {
160 spin_unlock(&dest->dst_lock); 189 __ip_vs_dst_set(dest, NULL, NULL, 0);
161 return NULL; 190 spin_unlock_bh(&dest->dst_lock);
191 ip_vs_dest_dst_free(dest_dst);
192 goto err_unreach;
162 } 193 }
163 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); 194 __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
164 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " 195 spin_unlock_bh(&dest->dst_lock);
165 "rtos=%X\n", 196 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
166 &dest->addr.ip, &dest->dst_saddr.ip, 197 &dest->addr.ip, &dest_dst->dst_saddr.ip,
167 atomic_read(&rt->dst.__refcnt), rtos); 198 atomic_read(&rt->dst.__refcnt));
168 } 199 }
169 daddr = dest->addr.ip; 200 daddr = dest->addr.ip;
170 if (ret_saddr) 201 if (ret_saddr)
171 *ret_saddr = dest->dst_saddr.ip; 202 *ret_saddr = dest_dst->dst_saddr.ip;
172 spin_unlock(&dest->dst_lock);
173 } else { 203 } else {
174 __be32 saddr = htonl(INADDR_ANY); 204 __be32 saddr = htonl(INADDR_ANY);
175 205
206 noref = 0;
207
176 /* For such unconfigured boxes avoid many route lookups 208 /* For such unconfigured boxes avoid many route lookups
177 * for performance reasons because we do not remember saddr 209 * for performance reasons because we do not remember saddr
178 */ 210 */
179 rt_mode &= ~IP_VS_RT_MODE_CONNECT; 211 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
180 rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); 212 rt = do_output_route4(net, daddr, rt_mode, &saddr);
181 if (!rt) 213 if (!rt)
182 return NULL; 214 goto err_unreach;
183 if (ret_saddr) 215 if (ret_saddr)
184 *ret_saddr = saddr; 216 *ret_saddr = saddr;
185 } 217 }
186 218
187 local = rt->rt_flags & RTCF_LOCAL; 219 local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
188 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & 220 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
189 rt_mode)) { 221 rt_mode)) {
190 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", 222 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
191 (rt->rt_flags & RTCF_LOCAL) ? 223 (rt->rt_flags & RTCF_LOCAL) ?
192 "local":"non-local", &daddr); 224 "local":"non-local", &daddr);
193 ip_rt_put(rt); 225 goto err_put;
194 return NULL;
195 }
196 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
197 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
198 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
199 "requires NAT method, dest: %pI4\n",
200 &ip_hdr(skb)->daddr, &daddr);
201 ip_rt_put(rt);
202 return NULL;
203 } 226 }
204 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { 227 iph = ip_hdr(skb);
205 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " 228 if (likely(!local)) {
206 "to non-local address, dest: %pI4\n", 229 if (unlikely(ipv4_is_loopback(iph->saddr))) {
207 &ip_hdr(skb)->saddr, &daddr); 230 IP_VS_DBG_RL("Stopping traffic from loopback address "
208 ip_rt_put(rt); 231 "%pI4 to non-local address, dest: %pI4\n",
209 return NULL; 232 &iph->saddr, &daddr);
233 goto err_put;
234 }
235 } else {
236 ort = skb_rtable(skb);
237 if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
238 !(ort->rt_flags & RTCF_LOCAL)) {
239 IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
240 "local requires NAT method, dest: %pI4\n",
241 &iph->daddr, &daddr);
242 goto err_put;
243 }
244 /* skb to local stack, preserve old route */
245 if (!noref)
246 ip_rt_put(rt);
247 return local;
210 } 248 }
211 249
212 return rt; 250 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
213} 251 mtu = dst_mtu(&rt->dst);
214 252 df = iph->frag_off & htons(IP_DF);
215/* Reroute packet to local IPv4 stack after DNAT */
216static int
217__ip_vs_reroute_locally(struct sk_buff *skb)
218{
219 struct rtable *rt = skb_rtable(skb);
220 struct net_device *dev = rt->dst.dev;
221 struct net *net = dev_net(dev);
222 struct iphdr *iph = ip_hdr(skb);
223
224 if (rt_is_input_route(rt)) {
225 unsigned long orefdst = skb->_skb_refdst;
226
227 if (ip_route_input(skb, iph->daddr, iph->saddr,
228 iph->tos, skb->dev))
229 return 0;
230 refdst_drop(orefdst);
231 } else { 253 } else {
232 struct flowi4 fl4 = { 254 struct sock *sk = skb->sk;
233 .daddr = iph->daddr, 255
234 .saddr = iph->saddr, 256 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
235 .flowi4_tos = RT_TOS(iph->tos), 257 if (mtu < 68) {
236 .flowi4_mark = skb->mark, 258 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
237 }; 259 goto err_put;
238
239 rt = ip_route_output_key(net, &fl4);
240 if (IS_ERR(rt))
241 return 0;
242 if (!(rt->rt_flags & RTCF_LOCAL)) {
243 ip_rt_put(rt);
244 return 0;
245 } 260 }
246 /* Drop old route. */ 261 ort = skb_rtable(skb);
247 skb_dst_drop(skb); 262 if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
248 skb_dst_set(skb, &rt->dst); 263 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
264 /* MTU check allowed? */
265 df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
249 } 266 }
250 return 1; 267
268 /* MTU checking */
269 if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
270 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
271 IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
272 goto err_put;
273 }
274
275 skb_dst_drop(skb);
276 if (noref) {
277 if (!local)
278 skb_dst_set_noref_force(skb, &rt->dst);
279 else
280 skb_dst_set(skb, dst_clone(&rt->dst));
281 } else
282 skb_dst_set(skb, &rt->dst);
283
284 return local;
285
286err_put:
287 if (!noref)
288 ip_rt_put(rt);
289 return -1;
290
291err_unreach:
292 dst_link_failure(skb);
293 return -1;
251} 294}
252 295
253#ifdef CONFIG_IP_VS_IPV6 296#ifdef CONFIG_IP_VS_IPV6
@@ -294,44 +337,57 @@ out_err:
294/* 337/*
295 * Get route to destination or remote server 338 * Get route to destination or remote server
296 */ 339 */
297static struct rt6_info * 340static int
298__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, 341__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
299 struct in6_addr *daddr, struct in6_addr *ret_saddr, 342 struct in6_addr *daddr, struct in6_addr *ret_saddr,
300 int do_xfrm, int rt_mode) 343 struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
301{ 344{
302 struct net *net = dev_net(skb_dst(skb)->dev); 345 struct net *net = dev_net(skb_dst(skb)->dev);
346 struct ip_vs_dest_dst *dest_dst;
303 struct rt6_info *rt; /* Route to the other host */ 347 struct rt6_info *rt; /* Route to the other host */
304 struct rt6_info *ort; /* Original route */ 348 struct rt6_info *ort; /* Original route */
305 struct dst_entry *dst; 349 struct dst_entry *dst;
306 int local; 350 int mtu;
351 int local, noref = 1;
307 352
308 if (dest) { 353 if (dest) {
309 spin_lock(&dest->dst_lock); 354 dest_dst = __ip_vs_dst_check(dest);
310 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); 355 if (likely(dest_dst))
311 if (!rt) { 356 rt = (struct rt6_info *) dest_dst->dst_cache;
357 else {
312 u32 cookie; 358 u32 cookie;
313 359
360 dest_dst = ip_vs_dest_dst_alloc();
361 spin_lock_bh(&dest->dst_lock);
362 if (!dest_dst) {
363 __ip_vs_dst_set(dest, NULL, NULL, 0);
364 spin_unlock_bh(&dest->dst_lock);
365 goto err_unreach;
366 }
314 dst = __ip_vs_route_output_v6(net, &dest->addr.in6, 367 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
315 &dest->dst_saddr.in6, 368 &dest_dst->dst_saddr.in6,
316 do_xfrm); 369 do_xfrm);
317 if (!dst) { 370 if (!dst) {
318 spin_unlock(&dest->dst_lock); 371 __ip_vs_dst_set(dest, NULL, NULL, 0);
319 return NULL; 372 spin_unlock_bh(&dest->dst_lock);
373 ip_vs_dest_dst_free(dest_dst);
374 goto err_unreach;
320 } 375 }
321 rt = (struct rt6_info *) dst; 376 rt = (struct rt6_info *) dst;
322 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 377 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
323 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); 378 __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
379 spin_unlock_bh(&dest->dst_lock);
324 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 380 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
325 &dest->addr.in6, &dest->dst_saddr.in6, 381 &dest->addr.in6, &dest_dst->dst_saddr.in6,
326 atomic_read(&rt->dst.__refcnt)); 382 atomic_read(&rt->dst.__refcnt));
327 } 383 }
328 if (ret_saddr) 384 if (ret_saddr)
329 *ret_saddr = dest->dst_saddr.in6; 385 *ret_saddr = dest_dst->dst_saddr.in6;
330 spin_unlock(&dest->dst_lock);
331 } else { 386 } else {
387 noref = 0;
332 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); 388 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
333 if (!dst) 389 if (!dst)
334 return NULL; 390 goto err_unreach;
335 rt = (struct rt6_info *) dst; 391 rt = (struct rt6_info *) dst;
336 } 392 }
337 393
@@ -340,86 +396,137 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
340 rt_mode)) { 396 rt_mode)) {
341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", 397 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
342 local ? "local":"non-local", daddr); 398 local ? "local":"non-local", daddr);
343 dst_release(&rt->dst); 399 goto err_put;
344 return NULL;
345 } 400 }
346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && 401 if (likely(!local)) {
347 !((ort = (struct rt6_info *) skb_dst(skb)) && 402 if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
348 __ip_vs_is_local_route6(ort))) { 403 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
349 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " 404 IPV6_ADDR_LOOPBACK)) {
350 "requires NAT method, dest: %pI6c\n", 405 IP_VS_DBG_RL("Stopping traffic from loopback address "
351 &ipv6_hdr(skb)->daddr, daddr); 406 "%pI6c to non-local address, "
352 dst_release(&rt->dst); 407 "dest: %pI6c\n",
353 return NULL; 408 &ipv6_hdr(skb)->saddr, daddr);
409 goto err_put;
410 }
411 } else {
412 ort = (struct rt6_info *) skb_dst(skb);
413 if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
414 !__ip_vs_is_local_route6(ort)) {
415 IP_VS_DBG_RL("Redirect from non-local address %pI6c "
416 "to local requires NAT method, "
417 "dest: %pI6c\n",
418 &ipv6_hdr(skb)->daddr, daddr);
419 goto err_put;
420 }
421 /* skb to local stack, preserve old route */
422 if (!noref)
423 dst_release(&rt->dst);
424 return local;
354 } 425 }
355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 426
356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) & 427 /* MTU checking */
357 IPV6_ADDR_LOOPBACK)) { 428 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " 429 mtu = dst_mtu(&rt->dst);
359 "to non-local address, dest: %pI6c\n", 430 else {
360 &ipv6_hdr(skb)->saddr, daddr); 431 struct sock *sk = skb->sk;
361 dst_release(&rt->dst); 432
362 return NULL; 433 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
434 if (mtu < IPV6_MIN_MTU) {
435 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
436 IPV6_MIN_MTU);
437 goto err_put;
438 }
439 ort = (struct rt6_info *) skb_dst(skb);
440 if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
441 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
363 } 442 }
364 443
365 return rt; 444 if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
445 if (!skb->dev)
446 skb->dev = net->loopback_dev;
447 /* only send ICMP too big on first fragment */
448 if (!ipvsh->fragoffs)
449 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
450 IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
451 goto err_put;
452 }
453
454 skb_dst_drop(skb);
455 if (noref) {
456 if (!local)
457 skb_dst_set_noref_force(skb, &rt->dst);
458 else
459 skb_dst_set(skb, dst_clone(&rt->dst));
460 } else
461 skb_dst_set(skb, &rt->dst);
462
463 return local;
464
465err_put:
466 if (!noref)
467 dst_release(&rt->dst);
468 return -1;
469
470err_unreach:
471 dst_link_failure(skb);
472 return -1;
366} 473}
367#endif 474#endif
368 475
369 476
370/* 477/* return NF_ACCEPT to allow forwarding or other NF_xxx on error */
371 * Release dest->dst_cache before a dest is removed 478static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
372 */ 479 struct ip_vs_conn *cp)
373void
374ip_vs_dst_reset(struct ip_vs_dest *dest)
375{ 480{
376 struct dst_entry *old_dst; 481 int ret = NF_ACCEPT;
482
483 skb->ipvs_property = 1;
484 if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
485 ret = ip_vs_confirm_conntrack(skb);
486 if (ret == NF_ACCEPT) {
487 nf_reset(skb);
488 skb_forward_csum(skb);
489 }
490 return ret;
491}
492
493/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
494static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
495 struct ip_vs_conn *cp, int local)
496{
497 int ret = NF_STOLEN;
377 498
378 old_dst = dest->dst_cache; 499 skb->ipvs_property = 1;
379 dest->dst_cache = NULL; 500 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
380 dst_release(old_dst); 501 ip_vs_notrack(skb);
381 dest->dst_saddr.ip = 0; 502 else
503 ip_vs_update_conntrack(skb, cp, 1);
504 if (!local) {
505 skb_forward_csum(skb);
506 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
507 dst_output);
508 } else
509 ret = NF_ACCEPT;
510 return ret;
382} 511}
383 512
384#define IP_VS_XMIT_TUNNEL(skb, cp) \ 513/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
385({ \ 514static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
386 int __ret = NF_ACCEPT; \ 515 struct ip_vs_conn *cp, int local)
387 \ 516{
388 (skb)->ipvs_property = 1; \ 517 int ret = NF_STOLEN;
389 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ 518
390 __ret = ip_vs_confirm_conntrack(skb); \ 519 skb->ipvs_property = 1;
391 if (__ret == NF_ACCEPT) { \ 520 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
392 nf_reset(skb); \ 521 ip_vs_notrack(skb);
393 skb_forward_csum(skb); \ 522 if (!local) {
394 } \ 523 skb_forward_csum(skb);
395 __ret; \ 524 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
396}) 525 dst_output);
397 526 } else
398#define IP_VS_XMIT_NAT(pf, skb, cp, local) \ 527 ret = NF_ACCEPT;
399do { \ 528 return ret;
400 (skb)->ipvs_property = 1; \ 529}
401 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
402 ip_vs_notrack(skb); \
403 else \
404 ip_vs_update_conntrack(skb, cp, 1); \
405 if (local) \
406 return NF_ACCEPT; \
407 skb_forward_csum(skb); \
408 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
409 skb_dst(skb)->dev, dst_output); \
410} while (0)
411
412#define IP_VS_XMIT(pf, skb, cp, local) \
413do { \
414 (skb)->ipvs_property = 1; \
415 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
416 ip_vs_notrack(skb); \
417 if (local) \
418 return NF_ACCEPT; \
419 skb_forward_csum(skb); \
420 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
421 skb_dst(skb)->dev, dst_output); \
422} while (0)
423 530
424 531
425/* 532/*
@@ -430,7 +537,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
430 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 537 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
431{ 538{
432 /* we do not touch skb and do not need pskb ptr */ 539 /* we do not touch skb and do not need pskb ptr */
433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 540 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
434} 541}
435 542
436 543
@@ -443,52 +550,29 @@ int
443ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 550ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
444 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 551 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
445{ 552{
446 struct rtable *rt; /* Route to the other host */
447 struct iphdr *iph = ip_hdr(skb); 553 struct iphdr *iph = ip_hdr(skb);
448 int mtu;
449 554
450 EnterFunction(10); 555 EnterFunction(10);
451 556
452 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), 557 rcu_read_lock();
453 IP_VS_RT_MODE_NON_LOCAL, NULL))) 558 if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
454 goto tx_error_icmp; 559 NULL) < 0)
455
456 /* MTU checking */
457 mtu = dst_mtu(&rt->dst);
458 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
459 !skb_is_gso(skb)) {
460 ip_rt_put(rt);
461 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
462 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
463 goto tx_error; 560 goto tx_error;
464 }
465 561
466 /* 562 ip_send_check(iph);
467 * Call ip_send_check because we are not sure it is called
468 * after ip_defrag. Is copy-on-write needed?
469 */
470 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
471 ip_rt_put(rt);
472 return NF_STOLEN;
473 }
474 ip_send_check(ip_hdr(skb));
475
476 /* drop old route */
477 skb_dst_drop(skb);
478 skb_dst_set(skb, &rt->dst);
479 563
480 /* Another hack: avoid icmp_send in ip_fragment */ 564 /* Another hack: avoid icmp_send in ip_fragment */
481 skb->local_df = 1; 565 skb->local_df = 1;
482 566
483 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); 567 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
568 rcu_read_unlock();
484 569
485 LeaveFunction(10); 570 LeaveFunction(10);
486 return NF_STOLEN; 571 return NF_STOLEN;
487 572
488 tx_error_icmp:
489 dst_link_failure(skb);
490 tx_error: 573 tx_error:
491 kfree_skb(skb); 574 kfree_skb(skb);
575 rcu_read_unlock();
492 LeaveFunction(10); 576 LeaveFunction(10);
493 return NF_STOLEN; 577 return NF_STOLEN;
494} 578}
@@ -496,60 +580,27 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
496#ifdef CONFIG_IP_VS_IPV6 580#ifdef CONFIG_IP_VS_IPV6
497int 581int
498ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 582ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
499 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 583 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
500{ 584{
501 struct rt6_info *rt; /* Route to the other host */
502 int mtu;
503
504 EnterFunction(10); 585 EnterFunction(10);
505 586
506 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, 587 rcu_read_lock();
507 IP_VS_RT_MODE_NON_LOCAL); 588 if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
508 if (!rt) 589 ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
509 goto tx_error_icmp;
510
511 /* MTU checking */
512 mtu = dst_mtu(&rt->dst);
513 if (__mtu_check_toobig_v6(skb, mtu)) {
514 if (!skb->dev) {
515 struct net *net = dev_net(skb_dst(skb)->dev);
516
517 skb->dev = net->loopback_dev;
518 }
519 /* only send ICMP too big on first fragment */
520 if (!iph->fragoffs)
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
522 dst_release(&rt->dst);
523 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
524 goto tx_error; 590 goto tx_error;
525 }
526
527 /*
528 * Call ip_send_check because we are not sure it is called
529 * after ip_defrag. Is copy-on-write needed?
530 */
531 skb = skb_share_check(skb, GFP_ATOMIC);
532 if (unlikely(skb == NULL)) {
533 dst_release(&rt->dst);
534 return NF_STOLEN;
535 }
536
537 /* drop old route */
538 skb_dst_drop(skb);
539 skb_dst_set(skb, &rt->dst);
540 591
541 /* Another hack: avoid icmp_send in ip_fragment */ 592 /* Another hack: avoid icmp_send in ip_fragment */
542 skb->local_df = 1; 593 skb->local_df = 1;
543 594
544 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); 595 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
596 rcu_read_unlock();
545 597
546 LeaveFunction(10); 598 LeaveFunction(10);
547 return NF_STOLEN; 599 return NF_STOLEN;
548 600
549 tx_error_icmp:
550 dst_link_failure(skb);
551 tx_error: 601 tx_error:
552 kfree_skb(skb); 602 kfree_skb(skb);
603 rcu_read_unlock();
553 LeaveFunction(10); 604 LeaveFunction(10);
554 return NF_STOLEN; 605 return NF_STOLEN;
555} 606}
@@ -564,29 +615,30 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
564 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 615 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
565{ 616{
566 struct rtable *rt; /* Route to the other host */ 617 struct rtable *rt; /* Route to the other host */
567 int mtu; 618 int local, rc, was_input;
568 struct iphdr *iph = ip_hdr(skb);
569 int local;
570 619
571 EnterFunction(10); 620 EnterFunction(10);
572 621
622 rcu_read_lock();
573 /* check if it is a connection of no-client-port */ 623 /* check if it is a connection of no-client-port */
574 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 624 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
575 __be16 _pt, *p; 625 __be16 _pt, *p;
576 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); 626
627 p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
577 if (p == NULL) 628 if (p == NULL)
578 goto tx_error; 629 goto tx_error;
579 ip_vs_conn_fill_cport(cp, *p); 630 ip_vs_conn_fill_cport(cp, *p);
580 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 631 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
581 } 632 }
582 633
583 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 634 was_input = rt_is_input_route(skb_rtable(skb));
584 RT_TOS(iph->tos), 635 local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
585 IP_VS_RT_MODE_LOCAL | 636 IP_VS_RT_MODE_LOCAL |
586 IP_VS_RT_MODE_NON_LOCAL | 637 IP_VS_RT_MODE_NON_LOCAL |
587 IP_VS_RT_MODE_RDR, NULL))) 638 IP_VS_RT_MODE_RDR, NULL);
588 goto tx_error_icmp; 639 if (local < 0)
589 local = rt->rt_flags & RTCF_LOCAL; 640 goto tx_error;
641 rt = skb_rtable(skb);
590 /* 642 /*
591 * Avoid duplicate tuple in reply direction for NAT traffic 643 * Avoid duplicate tuple in reply direction for NAT traffic
592 * to local address when connection is sync-ed 644 * to local address when connection is sync-ed
@@ -600,57 +652,31 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
600 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, 652 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
601 "ip_vs_nat_xmit(): " 653 "ip_vs_nat_xmit(): "
602 "stopping DNAT to local address"); 654 "stopping DNAT to local address");
603 goto tx_error_put; 655 goto tx_error;
604 } 656 }
605 } 657 }
606#endif 658#endif
607 659
608 /* From world but DNAT to loopback address? */ 660 /* From world but DNAT to loopback address? */
609 if (local && ipv4_is_loopback(cp->daddr.ip) && 661 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
610 rt_is_input_route(skb_rtable(skb))) {
611 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " 662 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
612 "stopping DNAT to loopback address"); 663 "stopping DNAT to loopback address");
613 goto tx_error_put; 664 goto tx_error;
614 }
615
616 /* MTU checking */
617 mtu = dst_mtu(&rt->dst);
618 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
619 !skb_is_gso(skb)) {
620 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
621 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
622 "ip_vs_nat_xmit(): frag needed for");
623 goto tx_error_put;
624 } 665 }
625 666
626 /* copy-on-write the packet before mangling it */ 667 /* copy-on-write the packet before mangling it */
627 if (!skb_make_writable(skb, sizeof(struct iphdr))) 668 if (!skb_make_writable(skb, sizeof(struct iphdr)))
628 goto tx_error_put; 669 goto tx_error;
629 670
630 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 671 if (skb_cow(skb, rt->dst.dev->hard_header_len))
631 goto tx_error_put; 672 goto tx_error;
632 673
633 /* mangle the packet */ 674 /* mangle the packet */
634 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) 675 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
635 goto tx_error_put; 676 goto tx_error;
636 ip_hdr(skb)->daddr = cp->daddr.ip; 677 ip_hdr(skb)->daddr = cp->daddr.ip;
637 ip_send_check(ip_hdr(skb)); 678 ip_send_check(ip_hdr(skb));
638 679
639 if (!local) {
640 /* drop old route */
641 skb_dst_drop(skb);
642 skb_dst_set(skb, &rt->dst);
643 } else {
644 ip_rt_put(rt);
645 /*
646 * Some IPv4 replies get local address from routes,
647 * not from iph, so while we DNAT after routing
648 * we need this second input/output route.
649 */
650 if (!__ip_vs_reroute_locally(skb))
651 goto tx_error;
652 }
653
654 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); 680 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
655 681
656 /* FIXME: when application helper enlarges the packet and the length 682 /* FIXME: when application helper enlarges the packet and the length
@@ -660,49 +686,48 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
660 /* Another hack: avoid icmp_send in ip_fragment */ 686 /* Another hack: avoid icmp_send in ip_fragment */
661 skb->local_df = 1; 687 skb->local_df = 1;
662 688
663 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); 689 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
690 rcu_read_unlock();
664 691
665 LeaveFunction(10); 692 LeaveFunction(10);
666 return NF_STOLEN; 693 return rc;
667 694
668 tx_error_icmp:
669 dst_link_failure(skb);
670 tx_error: 695 tx_error:
671 kfree_skb(skb); 696 kfree_skb(skb);
697 rcu_read_unlock();
672 LeaveFunction(10); 698 LeaveFunction(10);
673 return NF_STOLEN; 699 return NF_STOLEN;
674 tx_error_put:
675 ip_rt_put(rt);
676 goto tx_error;
677} 700}
678 701
679#ifdef CONFIG_IP_VS_IPV6 702#ifdef CONFIG_IP_VS_IPV6
680int 703int
681ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 704ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
682 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 705 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
683{ 706{
684 struct rt6_info *rt; /* Route to the other host */ 707 struct rt6_info *rt; /* Route to the other host */
685 int mtu; 708 int local, rc;
686 int local;
687 709
688 EnterFunction(10); 710 EnterFunction(10);
689 711
712 rcu_read_lock();
690 /* check if it is a connection of no-client-port */ 713 /* check if it is a connection of no-client-port */
691 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { 714 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
692 __be16 _pt, *p; 715 __be16 _pt, *p;
693 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); 716 p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
694 if (p == NULL) 717 if (p == NULL)
695 goto tx_error; 718 goto tx_error;
696 ip_vs_conn_fill_cport(cp, *p); 719 ip_vs_conn_fill_cport(cp, *p);
697 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 720 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
698 } 721 }
699 722
700 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 723 local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
701 0, (IP_VS_RT_MODE_LOCAL | 724 ipvsh, 0,
702 IP_VS_RT_MODE_NON_LOCAL | 725 IP_VS_RT_MODE_LOCAL |
703 IP_VS_RT_MODE_RDR)))) 726 IP_VS_RT_MODE_NON_LOCAL |
704 goto tx_error_icmp; 727 IP_VS_RT_MODE_RDR);
705 local = __ip_vs_is_local_route6(rt); 728 if (local < 0)
729 goto tx_error;
730 rt = (struct rt6_info *) skb_dst(skb);
706 /* 731 /*
707 * Avoid duplicate tuple in reply direction for NAT traffic 732 * Avoid duplicate tuple in reply direction for NAT traffic
708 * to local address when connection is sync-ed 733 * to local address when connection is sync-ed
@@ -716,7 +741,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
716 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, 741 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
717 "ip_vs_nat_xmit_v6(): " 742 "ip_vs_nat_xmit_v6(): "
718 "stopping DNAT to local address"); 743 "stopping DNAT to local address");
719 goto tx_error_put; 744 goto tx_error;
720 } 745 }
721 } 746 }
722#endif 747#endif
@@ -727,46 +752,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
727 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, 752 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
728 "ip_vs_nat_xmit_v6(): " 753 "ip_vs_nat_xmit_v6(): "
729 "stopping DNAT to loopback address"); 754 "stopping DNAT to loopback address");
730 goto tx_error_put; 755 goto tx_error;
731 }
732
733 /* MTU checking */
734 mtu = dst_mtu(&rt->dst);
735 if (__mtu_check_toobig_v6(skb, mtu)) {
736 if (!skb->dev) {
737 struct net *net = dev_net(skb_dst(skb)->dev);
738
739 skb->dev = net->loopback_dev;
740 }
741 /* only send ICMP too big on first fragment */
742 if (!iph->fragoffs)
743 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
744 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
745 "ip_vs_nat_xmit_v6(): frag needed for");
746 goto tx_error_put;
747 } 756 }
748 757
749 /* copy-on-write the packet before mangling it */ 758 /* copy-on-write the packet before mangling it */
750 if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) 759 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
751 goto tx_error_put; 760 goto tx_error;
752 761
753 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 762 if (skb_cow(skb, rt->dst.dev->hard_header_len))
754 goto tx_error_put; 763 goto tx_error;
755 764
756 /* mangle the packet */ 765 /* mangle the packet */
757 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) 766 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
758 goto tx_error; 767 goto tx_error;
759 ipv6_hdr(skb)->daddr = cp->daddr.in6; 768 ipv6_hdr(skb)->daddr = cp->daddr.in6;
760 769
761 if (!local || !skb->dev) {
762 /* drop the old route when skb is not shared */
763 skb_dst_drop(skb);
764 skb_dst_set(skb, &rt->dst);
765 } else {
766 /* destined to loopback, do we need to change route? */
767 dst_release(&rt->dst);
768 }
769
770 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); 770 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
771 771
772 /* FIXME: when application helper enlarges the packet and the length 772 /* FIXME: when application helper enlarges the packet and the length
@@ -776,20 +776,17 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
776 /* Another hack: avoid icmp_send in ip_fragment */ 776 /* Another hack: avoid icmp_send in ip_fragment */
777 skb->local_df = 1; 777 skb->local_df = 1;
778 778
779 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); 779 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
780 rcu_read_unlock();
780 781
781 LeaveFunction(10); 782 LeaveFunction(10);
782 return NF_STOLEN; 783 return rc;
783 784
784tx_error_icmp:
785 dst_link_failure(skb);
786tx_error: 785tx_error:
787 LeaveFunction(10); 786 LeaveFunction(10);
788 kfree_skb(skb); 787 kfree_skb(skb);
788 rcu_read_unlock();
789 return NF_STOLEN; 789 return NF_STOLEN;
790tx_error_put:
791 dst_release(&rt->dst);
792 goto tx_error;
793} 790}
794#endif 791#endif
795 792
@@ -826,56 +823,40 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
826 __be16 df; 823 __be16 df;
827 struct iphdr *iph; /* Our new IP header */ 824 struct iphdr *iph; /* Our new IP header */
828 unsigned int max_headroom; /* The extra header space needed */ 825 unsigned int max_headroom; /* The extra header space needed */
829 int mtu; 826 int ret, local;
830 int ret;
831 827
832 EnterFunction(10); 828 EnterFunction(10);
833 829
834 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 830 rcu_read_lock();
835 RT_TOS(tos), IP_VS_RT_MODE_LOCAL | 831 local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
836 IP_VS_RT_MODE_NON_LOCAL | 832 IP_VS_RT_MODE_LOCAL |
837 IP_VS_RT_MODE_CONNECT, 833 IP_VS_RT_MODE_NON_LOCAL |
838 &saddr))) 834 IP_VS_RT_MODE_CONNECT |
839 goto tx_error_icmp; 835 IP_VS_RT_MODE_TUNNEL, &saddr);
840 if (rt->rt_flags & RTCF_LOCAL) { 836 if (local < 0)
841 ip_rt_put(rt); 837 goto tx_error;
842 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 838 if (local) {
839 rcu_read_unlock();
840 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
843 } 841 }
844 842
843 rt = skb_rtable(skb);
845 tdev = rt->dst.dev; 844 tdev = rt->dst.dev;
846 845
847 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
848 if (mtu < 68) {
849 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
850 goto tx_error_put;
851 }
852 if (rt_is_output_route(skb_rtable(skb)))
853 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
854
855 /* Copy DF, reset fragment offset and MF */ 846 /* Copy DF, reset fragment offset and MF */
856 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; 847 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
857 848
858 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
859 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
860 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
861 goto tx_error_put;
862 }
863
864 /* 849 /*
865 * Okay, now see if we can stuff it in the buffer as-is. 850 * Okay, now see if we can stuff it in the buffer as-is.
866 */ 851 */
867 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); 852 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
868 853
869 if (skb_headroom(skb) < max_headroom 854 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
870 || skb_cloned(skb) || skb_shared(skb)) {
871 struct sk_buff *new_skb = 855 struct sk_buff *new_skb =
872 skb_realloc_headroom(skb, max_headroom); 856 skb_realloc_headroom(skb, max_headroom);
873 if (!new_skb) { 857
874 ip_rt_put(rt); 858 if (!new_skb)
875 kfree_skb(skb); 859 goto tx_error;
876 IP_VS_ERR_RL("%s(): no memory\n", __func__);
877 return NF_STOLEN;
878 }
879 consume_skb(skb); 860 consume_skb(skb);
880 skb = new_skb; 861 skb = new_skb;
881 old_iph = ip_hdr(skb); 862 old_iph = ip_hdr(skb);
@@ -890,10 +871,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
890 skb_reset_network_header(skb); 871 skb_reset_network_header(skb);
891 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 872 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
892 873
893 /* drop old route */
894 skb_dst_drop(skb);
895 skb_dst_set(skb, &rt->dst);
896
897 /* 874 /*
898 * Push down and install the IPIP header. 875 * Push down and install the IPIP header.
899 */ 876 */
@@ -911,25 +888,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
911 /* Another hack: avoid icmp_send in ip_fragment */ 888 /* Another hack: avoid icmp_send in ip_fragment */
912 skb->local_df = 1; 889 skb->local_df = 1;
913 890
914 ret = IP_VS_XMIT_TUNNEL(skb, cp); 891 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
915 if (ret == NF_ACCEPT) 892 if (ret == NF_ACCEPT)
916 ip_local_out(skb); 893 ip_local_out(skb);
917 else if (ret == NF_DROP) 894 else if (ret == NF_DROP)
918 kfree_skb(skb); 895 kfree_skb(skb);
896 rcu_read_unlock();
919 897
920 LeaveFunction(10); 898 LeaveFunction(10);
921 899
922 return NF_STOLEN; 900 return NF_STOLEN;
923 901
924 tx_error_icmp:
925 dst_link_failure(skb);
926 tx_error: 902 tx_error:
927 kfree_skb(skb); 903 kfree_skb(skb);
904 rcu_read_unlock();
928 LeaveFunction(10); 905 LeaveFunction(10);
929 return NF_STOLEN; 906 return NF_STOLEN;
930tx_error_put:
931 ip_rt_put(rt);
932 goto tx_error;
933} 907}
934 908
935#ifdef CONFIG_IP_VS_IPV6 909#ifdef CONFIG_IP_VS_IPV6
@@ -943,60 +917,37 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
943 struct ipv6hdr *old_iph = ipv6_hdr(skb); 917 struct ipv6hdr *old_iph = ipv6_hdr(skb);
944 struct ipv6hdr *iph; /* Our new IP header */ 918 struct ipv6hdr *iph; /* Our new IP header */
945 unsigned int max_headroom; /* The extra header space needed */ 919 unsigned int max_headroom; /* The extra header space needed */
946 int mtu; 920 int ret, local;
947 int ret;
948 921
949 EnterFunction(10); 922 EnterFunction(10);
950 923
951 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, 924 rcu_read_lock();
952 &saddr, 1, (IP_VS_RT_MODE_LOCAL | 925 local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
953 IP_VS_RT_MODE_NON_LOCAL)))) 926 &saddr, ipvsh, 1,
954 goto tx_error_icmp; 927 IP_VS_RT_MODE_LOCAL |
955 if (__ip_vs_is_local_route6(rt)) { 928 IP_VS_RT_MODE_NON_LOCAL |
956 dst_release(&rt->dst); 929 IP_VS_RT_MODE_TUNNEL);
957 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); 930 if (local < 0)
931 goto tx_error;
932 if (local) {
933 rcu_read_unlock();
934 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
958 } 935 }
959 936
937 rt = (struct rt6_info *) skb_dst(skb);
960 tdev = rt->dst.dev; 938 tdev = rt->dst.dev;
961 939
962 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
963 if (mtu < IPV6_MIN_MTU) {
964 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
965 IPV6_MIN_MTU);
966 goto tx_error_put;
967 }
968 if (skb_dst(skb))
969 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
970
971 /* MTU checking: Notice that 'mtu' have been adjusted before hand */
972 if (__mtu_check_toobig_v6(skb, mtu)) {
973 if (!skb->dev) {
974 struct net *net = dev_net(skb_dst(skb)->dev);
975
976 skb->dev = net->loopback_dev;
977 }
978 /* only send ICMP too big on first fragment */
979 if (!ipvsh->fragoffs)
980 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
981 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
982 goto tx_error_put;
983 }
984
985 /* 940 /*
986 * Okay, now see if we can stuff it in the buffer as-is. 941 * Okay, now see if we can stuff it in the buffer as-is.
987 */ 942 */
988 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); 943 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
989 944
990 if (skb_headroom(skb) < max_headroom 945 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
991 || skb_cloned(skb) || skb_shared(skb)) {
992 struct sk_buff *new_skb = 946 struct sk_buff *new_skb =
993 skb_realloc_headroom(skb, max_headroom); 947 skb_realloc_headroom(skb, max_headroom);
994 if (!new_skb) { 948
995 dst_release(&rt->dst); 949 if (!new_skb)
996 kfree_skb(skb); 950 goto tx_error;
997 IP_VS_ERR_RL("%s(): no memory\n", __func__);
998 return NF_STOLEN;
999 }
1000 consume_skb(skb); 951 consume_skb(skb);
1001 skb = new_skb; 952 skb = new_skb;
1002 old_iph = ipv6_hdr(skb); 953 old_iph = ipv6_hdr(skb);
@@ -1008,10 +959,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1008 skb_reset_network_header(skb); 959 skb_reset_network_header(skb);
1009 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 960 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1010 961
1011 /* drop old route */
1012 skb_dst_drop(skb);
1013 skb_dst_set(skb, &rt->dst);
1014
1015 /* 962 /*
1016 * Push down and install the IPIP header. 963 * Push down and install the IPIP header.
1017 */ 964 */
@@ -1029,25 +976,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1029 /* Another hack: avoid icmp_send in ip_fragment */ 976 /* Another hack: avoid icmp_send in ip_fragment */
1030 skb->local_df = 1; 977 skb->local_df = 1;
1031 978
1032 ret = IP_VS_XMIT_TUNNEL(skb, cp); 979 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
1033 if (ret == NF_ACCEPT) 980 if (ret == NF_ACCEPT)
1034 ip6_local_out(skb); 981 ip6_local_out(skb);
1035 else if (ret == NF_DROP) 982 else if (ret == NF_DROP)
1036 kfree_skb(skb); 983 kfree_skb(skb);
984 rcu_read_unlock();
1037 985
1038 LeaveFunction(10); 986 LeaveFunction(10);
1039 987
1040 return NF_STOLEN; 988 return NF_STOLEN;
1041 989
1042tx_error_icmp:
1043 dst_link_failure(skb);
1044tx_error: 990tx_error:
1045 kfree_skb(skb); 991 kfree_skb(skb);
992 rcu_read_unlock();
1046 LeaveFunction(10); 993 LeaveFunction(10);
1047 return NF_STOLEN; 994 return NF_STOLEN;
1048tx_error_put:
1049 dst_release(&rt->dst);
1050 goto tx_error;
1051} 995}
1052#endif 996#endif
1053 997
@@ -1060,59 +1004,36 @@ int
1060ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1004ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1061 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1005 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1062{ 1006{
1063 struct rtable *rt; /* Route to the other host */ 1007 int local;
1064 struct iphdr *iph = ip_hdr(skb);
1065 int mtu;
1066 1008
1067 EnterFunction(10); 1009 EnterFunction(10);
1068 1010
1069 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1011 rcu_read_lock();
1070 RT_TOS(iph->tos), 1012 local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1071 IP_VS_RT_MODE_LOCAL | 1013 IP_VS_RT_MODE_LOCAL |
1072 IP_VS_RT_MODE_NON_LOCAL | 1014 IP_VS_RT_MODE_NON_LOCAL |
1073 IP_VS_RT_MODE_KNOWN_NH, NULL))) 1015 IP_VS_RT_MODE_KNOWN_NH, NULL);
1074 goto tx_error_icmp; 1016 if (local < 0)
1075 if (rt->rt_flags & RTCF_LOCAL) {
1076 ip_rt_put(rt);
1077 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1078 }
1079
1080 /* MTU checking */
1081 mtu = dst_mtu(&rt->dst);
1082 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1083 !skb_is_gso(skb)) {
1084 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1085 ip_rt_put(rt);
1086 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1087 goto tx_error; 1017 goto tx_error;
1018 if (local) {
1019 rcu_read_unlock();
1020 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
1088 } 1021 }
1089 1022
1090 /*
1091 * Call ip_send_check because we are not sure it is called
1092 * after ip_defrag. Is copy-on-write needed?
1093 */
1094 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1095 ip_rt_put(rt);
1096 return NF_STOLEN;
1097 }
1098 ip_send_check(ip_hdr(skb)); 1023 ip_send_check(ip_hdr(skb));
1099 1024
1100 /* drop old route */
1101 skb_dst_drop(skb);
1102 skb_dst_set(skb, &rt->dst);
1103
1104 /* Another hack: avoid icmp_send in ip_fragment */ 1025 /* Another hack: avoid icmp_send in ip_fragment */
1105 skb->local_df = 1; 1026 skb->local_df = 1;
1106 1027
1107 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); 1028 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
1029 rcu_read_unlock();
1108 1030
1109 LeaveFunction(10); 1031 LeaveFunction(10);
1110 return NF_STOLEN; 1032 return NF_STOLEN;
1111 1033
1112 tx_error_icmp:
1113 dst_link_failure(skb);
1114 tx_error: 1034 tx_error:
1115 kfree_skb(skb); 1035 kfree_skb(skb);
1036 rcu_read_unlock();
1116 LeaveFunction(10); 1037 LeaveFunction(10);
1117 return NF_STOLEN; 1038 return NF_STOLEN;
1118} 1039}
@@ -1120,64 +1041,36 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1120#ifdef CONFIG_IP_VS_IPV6 1041#ifdef CONFIG_IP_VS_IPV6
1121int 1042int
1122ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1043ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1123 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 1044 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1124{ 1045{
1125 struct rt6_info *rt; /* Route to the other host */ 1046 int local;
1126 int mtu;
1127 1047
1128 EnterFunction(10); 1048 EnterFunction(10);
1129 1049
1130 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1050 rcu_read_lock();
1131 0, (IP_VS_RT_MODE_LOCAL | 1051 local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1132 IP_VS_RT_MODE_NON_LOCAL)))) 1052 ipvsh, 0,
1133 goto tx_error_icmp; 1053 IP_VS_RT_MODE_LOCAL |
1134 if (__ip_vs_is_local_route6(rt)) { 1054 IP_VS_RT_MODE_NON_LOCAL);
1135 dst_release(&rt->dst); 1055 if (local < 0)
1136 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1137 }
1138
1139 /* MTU checking */
1140 mtu = dst_mtu(&rt->dst);
1141 if (__mtu_check_toobig_v6(skb, mtu)) {
1142 if (!skb->dev) {
1143 struct net *net = dev_net(skb_dst(skb)->dev);
1144
1145 skb->dev = net->loopback_dev;
1146 }
1147 /* only send ICMP too big on first fragment */
1148 if (!iph->fragoffs)
1149 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1150 dst_release(&rt->dst);
1151 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1152 goto tx_error; 1056 goto tx_error;
1057 if (local) {
1058 rcu_read_unlock();
1059 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
1153 } 1060 }
1154 1061
1155 /*
1156 * Call ip_send_check because we are not sure it is called
1157 * after ip_defrag. Is copy-on-write needed?
1158 */
1159 skb = skb_share_check(skb, GFP_ATOMIC);
1160 if (unlikely(skb == NULL)) {
1161 dst_release(&rt->dst);
1162 return NF_STOLEN;
1163 }
1164
1165 /* drop old route */
1166 skb_dst_drop(skb);
1167 skb_dst_set(skb, &rt->dst);
1168
1169 /* Another hack: avoid icmp_send in ip_fragment */ 1062 /* Another hack: avoid icmp_send in ip_fragment */
1170 skb->local_df = 1; 1063 skb->local_df = 1;
1171 1064
1172 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); 1065 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
1066 rcu_read_unlock();
1173 1067
1174 LeaveFunction(10); 1068 LeaveFunction(10);
1175 return NF_STOLEN; 1069 return NF_STOLEN;
1176 1070
1177tx_error_icmp:
1178 dst_link_failure(skb);
1179tx_error: 1071tx_error:
1180 kfree_skb(skb); 1072 kfree_skb(skb);
1073 rcu_read_unlock();
1181 LeaveFunction(10); 1074 LeaveFunction(10);
1182 return NF_STOLEN; 1075 return NF_STOLEN;
1183} 1076}
@@ -1194,10 +1087,9 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1194 struct ip_vs_iphdr *iph) 1087 struct ip_vs_iphdr *iph)
1195{ 1088{
1196 struct rtable *rt; /* Route to the other host */ 1089 struct rtable *rt; /* Route to the other host */
1197 int mtu;
1198 int rc; 1090 int rc;
1199 int local; 1091 int local;
1200 int rt_mode; 1092 int rt_mode, was_input;
1201 1093
1202 EnterFunction(10); 1094 EnterFunction(10);
1203 1095
@@ -1217,16 +1109,17 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1217 /* 1109 /*
1218 * mangle and send the packet here (only for VS/NAT) 1110 * mangle and send the packet here (only for VS/NAT)
1219 */ 1111 */
1112 was_input = rt_is_input_route(skb_rtable(skb));
1220 1113
1221 /* LOCALNODE from FORWARD hook is not supported */ 1114 /* LOCALNODE from FORWARD hook is not supported */
1222 rt_mode = (hooknum != NF_INET_FORWARD) ? 1115 rt_mode = (hooknum != NF_INET_FORWARD) ?
1223 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1116 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1224 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1117 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1225 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1118 rcu_read_lock();
1226 RT_TOS(ip_hdr(skb)->tos), 1119 local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
1227 rt_mode, NULL))) 1120 if (local < 0)
1228 goto tx_error_icmp; 1121 goto tx_error;
1229 local = rt->rt_flags & RTCF_LOCAL; 1122 rt = skb_rtable(skb);
1230 1123
1231 /* 1124 /*
1232 * Avoid duplicate tuple in reply direction for NAT traffic 1125 * Avoid duplicate tuple in reply direction for NAT traffic
@@ -1241,82 +1134,51 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1241 IP_VS_DBG(10, "%s(): " 1134 IP_VS_DBG(10, "%s(): "
1242 "stopping DNAT to local address %pI4\n", 1135 "stopping DNAT to local address %pI4\n",
1243 __func__, &cp->daddr.ip); 1136 __func__, &cp->daddr.ip);
1244 goto tx_error_put; 1137 goto tx_error;
1245 } 1138 }
1246 } 1139 }
1247#endif 1140#endif
1248 1141
1249 /* From world but DNAT to loopback address? */ 1142 /* From world but DNAT to loopback address? */
1250 if (local && ipv4_is_loopback(cp->daddr.ip) && 1143 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
1251 rt_is_input_route(skb_rtable(skb))) {
1252 IP_VS_DBG(1, "%s(): " 1144 IP_VS_DBG(1, "%s(): "
1253 "stopping DNAT to loopback %pI4\n", 1145 "stopping DNAT to loopback %pI4\n",
1254 __func__, &cp->daddr.ip); 1146 __func__, &cp->daddr.ip);
1255 goto tx_error_put; 1147 goto tx_error;
1256 }
1257
1258 /* MTU checking */
1259 mtu = dst_mtu(&rt->dst);
1260 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1261 !skb_is_gso(skb)) {
1262 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1263 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1264 goto tx_error_put;
1265 } 1148 }
1266 1149
1267 /* copy-on-write the packet before mangling it */ 1150 /* copy-on-write the packet before mangling it */
1268 if (!skb_make_writable(skb, offset)) 1151 if (!skb_make_writable(skb, offset))
1269 goto tx_error_put; 1152 goto tx_error;
1270 1153
1271 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1154 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1272 goto tx_error_put; 1155 goto tx_error;
1273 1156
1274 ip_vs_nat_icmp(skb, pp, cp, 0); 1157 ip_vs_nat_icmp(skb, pp, cp, 0);
1275 1158
1276 if (!local) {
1277 /* drop the old route when skb is not shared */
1278 skb_dst_drop(skb);
1279 skb_dst_set(skb, &rt->dst);
1280 } else {
1281 ip_rt_put(rt);
1282 /*
1283 * Some IPv4 replies get local address from routes,
1284 * not from iph, so while we DNAT after routing
1285 * we need this second input/output route.
1286 */
1287 if (!__ip_vs_reroute_locally(skb))
1288 goto tx_error;
1289 }
1290
1291 /* Another hack: avoid icmp_send in ip_fragment */ 1159 /* Another hack: avoid icmp_send in ip_fragment */
1292 skb->local_df = 1; 1160 skb->local_df = 1;
1293 1161
1294 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); 1162 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
1295 1163 rcu_read_unlock();
1296 rc = NF_STOLEN;
1297 goto out; 1164 goto out;
1298 1165
1299 tx_error_icmp:
1300 dst_link_failure(skb);
1301 tx_error: 1166 tx_error:
1302 dev_kfree_skb(skb); 1167 kfree_skb(skb);
1168 rcu_read_unlock();
1303 rc = NF_STOLEN; 1169 rc = NF_STOLEN;
1304 out: 1170 out:
1305 LeaveFunction(10); 1171 LeaveFunction(10);
1306 return rc; 1172 return rc;
1307 tx_error_put:
1308 ip_rt_put(rt);
1309 goto tx_error;
1310} 1173}
1311 1174
1312#ifdef CONFIG_IP_VS_IPV6 1175#ifdef CONFIG_IP_VS_IPV6
1313int 1176int
1314ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1177ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1315 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1178 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1316 struct ip_vs_iphdr *iph) 1179 struct ip_vs_iphdr *ipvsh)
1317{ 1180{
1318 struct rt6_info *rt; /* Route to the other host */ 1181 struct rt6_info *rt; /* Route to the other host */
1319 int mtu;
1320 int rc; 1182 int rc;
1321 int local; 1183 int local;
1322 int rt_mode; 1184 int rt_mode;
@@ -1328,7 +1190,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1328 translate address/port back */ 1190 translate address/port back */
1329 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1191 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1330 if (cp->packet_xmit) 1192 if (cp->packet_xmit)
1331 rc = cp->packet_xmit(skb, cp, pp, iph); 1193 rc = cp->packet_xmit(skb, cp, pp, ipvsh);
1332 else 1194 else
1333 rc = NF_ACCEPT; 1195 rc = NF_ACCEPT;
1334 /* do not touch skb anymore */ 1196 /* do not touch skb anymore */
@@ -1344,11 +1206,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1344 rt_mode = (hooknum != NF_INET_FORWARD) ? 1206 rt_mode = (hooknum != NF_INET_FORWARD) ?
1345 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1207 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1346 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1208 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1347 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1209 rcu_read_lock();
1348 0, rt_mode))) 1210 local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1349 goto tx_error_icmp; 1211 ipvsh, 0, rt_mode);
1350 1212 if (local < 0)
1351 local = __ip_vs_is_local_route6(rt); 1213 goto tx_error;
1214 rt = (struct rt6_info *) skb_dst(skb);
1352 /* 1215 /*
1353 * Avoid duplicate tuple in reply direction for NAT traffic 1216 * Avoid duplicate tuple in reply direction for NAT traffic
1354 * to local address when connection is sync-ed 1217 * to local address when connection is sync-ed
@@ -1362,7 +1225,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1362 IP_VS_DBG(10, "%s(): " 1225 IP_VS_DBG(10, "%s(): "
1363 "stopping DNAT to local address %pI6\n", 1226 "stopping DNAT to local address %pI6\n",
1364 __func__, &cp->daddr.in6); 1227 __func__, &cp->daddr.in6);
1365 goto tx_error_put; 1228 goto tx_error;
1366 } 1229 }
1367 } 1230 }
1368#endif 1231#endif
@@ -1373,60 +1236,31 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1373 IP_VS_DBG(1, "%s(): " 1236 IP_VS_DBG(1, "%s(): "
1374 "stopping DNAT to loopback %pI6\n", 1237 "stopping DNAT to loopback %pI6\n",
1375 __func__, &cp->daddr.in6); 1238 __func__, &cp->daddr.in6);
1376 goto tx_error_put; 1239 goto tx_error;
1377 }
1378
1379 /* MTU checking */
1380 mtu = dst_mtu(&rt->dst);
1381 if (__mtu_check_toobig_v6(skb, mtu)) {
1382 if (!skb->dev) {
1383 struct net *net = dev_net(skb_dst(skb)->dev);
1384
1385 skb->dev = net->loopback_dev;
1386 }
1387 /* only send ICMP too big on first fragment */
1388 if (!iph->fragoffs)
1389 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1390 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1391 goto tx_error_put;
1392 } 1240 }
1393 1241
1394 /* copy-on-write the packet before mangling it */ 1242 /* copy-on-write the packet before mangling it */
1395 if (!skb_make_writable(skb, offset)) 1243 if (!skb_make_writable(skb, offset))
1396 goto tx_error_put; 1244 goto tx_error;
1397 1245
1398 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1246 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1399 goto tx_error_put; 1247 goto tx_error;
1400 1248
1401 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1249 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1402 1250
1403 if (!local || !skb->dev) {
1404 /* drop the old route when skb is not shared */
1405 skb_dst_drop(skb);
1406 skb_dst_set(skb, &rt->dst);
1407 } else {
1408 /* destined to loopback, do we need to change route? */
1409 dst_release(&rt->dst);
1410 }
1411
1412 /* Another hack: avoid icmp_send in ip_fragment */ 1251 /* Another hack: avoid icmp_send in ip_fragment */
1413 skb->local_df = 1; 1252 skb->local_df = 1;
1414 1253
1415 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); 1254 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
1416 1255 rcu_read_unlock();
1417 rc = NF_STOLEN;
1418 goto out; 1256 goto out;
1419 1257
1420tx_error_icmp:
1421 dst_link_failure(skb);
1422tx_error: 1258tx_error:
1423 dev_kfree_skb(skb); 1259 kfree_skb(skb);
1260 rcu_read_unlock();
1424 rc = NF_STOLEN; 1261 rc = NF_STOLEN;
1425out: 1262out:
1426 LeaveFunction(10); 1263 LeaveFunction(10);
1427 return rc; 1264 return rc;
1428tx_error_put:
1429 dst_release(&rt->dst);
1430 goto tx_error;
1431} 1265}
1432#endif 1266#endif
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index dbdaa1149260..b8b95f4027ca 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> 3 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
4 * based on HW's ip_conntrack_irc.c as well as other modules 4 * based on HW's ip_conntrack_irc.c as well as other modules
5 * (C) 2006 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c8e001a9c45b..ebb81d64436c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -5,6 +5,7 @@
5/* (C) 1999-2001 Paul `Rusty' Russell 5/* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
8 * 9 *
9 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as 11 * it under the terms of the GNU General Public License version 2 as
@@ -48,6 +49,7 @@
48#include <net/netfilter/nf_conntrack_labels.h> 49#include <net/netfilter/nf_conntrack_labels.h>
49#include <net/netfilter/nf_nat.h> 50#include <net/netfilter/nf_nat.h>
50#include <net/netfilter/nf_nat_core.h> 51#include <net/netfilter/nf_nat_core.h>
52#include <net/netfilter/nf_nat_helper.h>
51 53
52#define NF_CONNTRACK_VERSION "0.5.0" 54#define NF_CONNTRACK_VERSION "0.5.0"
53 55
@@ -1259,7 +1261,7 @@ void nf_ct_iterate_cleanup(struct net *net,
1259EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); 1261EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
1260 1262
1261struct __nf_ct_flush_report { 1263struct __nf_ct_flush_report {
1262 u32 pid; 1264 u32 portid;
1263 int report; 1265 int report;
1264}; 1266};
1265 1267
@@ -1274,7 +1276,7 @@ static int kill_report(struct nf_conn *i, void *data)
1274 1276
1275 /* If we fail to deliver the event, death_by_timeout() will retry */ 1277 /* If we fail to deliver the event, death_by_timeout() will retry */
1276 if (nf_conntrack_event_report(IPCT_DESTROY, i, 1278 if (nf_conntrack_event_report(IPCT_DESTROY, i,
1277 fr->pid, fr->report) < 0) 1279 fr->portid, fr->report) < 0)
1278 return 1; 1280 return 1;
1279 1281
1280 /* Avoid the delivery of the destroy event in death_by_timeout(). */ 1282 /* Avoid the delivery of the destroy event in death_by_timeout(). */
@@ -1297,10 +1299,10 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)
1297} 1299}
1298EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 1300EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
1299 1301
1300void nf_conntrack_flush_report(struct net *net, u32 pid, int report) 1302void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
1301{ 1303{
1302 struct __nf_ct_flush_report fr = { 1304 struct __nf_ct_flush_report fr = {
1303 .pid = pid, 1305 .portid = portid,
1304 .report = report, 1306 .report = report,
1305 }; 1307 };
1306 nf_ct_iterate_cleanup(net, kill_report, &fr); 1308 nf_ct_iterate_cleanup(net, kill_report, &fr);
@@ -1364,30 +1366,48 @@ void nf_conntrack_cleanup_end(void)
1364 */ 1366 */
1365void nf_conntrack_cleanup_net(struct net *net) 1367void nf_conntrack_cleanup_net(struct net *net)
1366{ 1368{
1369 LIST_HEAD(single);
1370
1371 list_add(&net->exit_list, &single);
1372 nf_conntrack_cleanup_net_list(&single);
1373}
1374
1375void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
1376{
1377 int busy;
1378 struct net *net;
1379
1367 /* 1380 /*
1368 * This makes sure all current packets have passed through 1381 * This makes sure all current packets have passed through
1369 * netfilter framework. Roll on, two-stage module 1382 * netfilter framework. Roll on, two-stage module
1370 * delete... 1383 * delete...
1371 */ 1384 */
1372 synchronize_net(); 1385 synchronize_net();
1373 i_see_dead_people: 1386i_see_dead_people:
1374 nf_ct_iterate_cleanup(net, kill_all, NULL); 1387 busy = 0;
1375 nf_ct_release_dying_list(net); 1388 list_for_each_entry(net, net_exit_list, exit_list) {
1376 if (atomic_read(&net->ct.count) != 0) { 1389 nf_ct_iterate_cleanup(net, kill_all, NULL);
1390 nf_ct_release_dying_list(net);
1391 if (atomic_read(&net->ct.count) != 0)
1392 busy = 1;
1393 }
1394 if (busy) {
1377 schedule(); 1395 schedule();
1378 goto i_see_dead_people; 1396 goto i_see_dead_people;
1379 } 1397 }
1380 1398
1381 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); 1399 list_for_each_entry(net, net_exit_list, exit_list) {
1382 nf_conntrack_proto_pernet_fini(net); 1400 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1383 nf_conntrack_helper_pernet_fini(net); 1401 nf_conntrack_proto_pernet_fini(net);
1384 nf_conntrack_ecache_pernet_fini(net); 1402 nf_conntrack_helper_pernet_fini(net);
1385 nf_conntrack_tstamp_pernet_fini(net); 1403 nf_conntrack_ecache_pernet_fini(net);
1386 nf_conntrack_acct_pernet_fini(net); 1404 nf_conntrack_tstamp_pernet_fini(net);
1387 nf_conntrack_expect_pernet_fini(net); 1405 nf_conntrack_acct_pernet_fini(net);
1388 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1406 nf_conntrack_expect_pernet_fini(net);
1389 kfree(net->ct.slabname); 1407 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1390 free_percpu(net->ct.stat); 1408 kfree(net->ct.slabname);
1409 free_percpu(net->ct.stat);
1410 }
1391} 1411}
1392 1412
1393void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) 1413void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index b5d2eb8bf0d5..1df176146567 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -1,8 +1,10 @@
1/* Event cache for netfilter. */ 1/* Event cache for netfilter. */
2 2
3/* (C) 1999-2001 Paul `Rusty' Russell 3/*
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2005 Harald Welte <laforge@gnumonks.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 5 * (C) 2005 Patrick McHardy <kaber@trash.net>
6 * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * 8 *
7 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 8c10e3db3d9b..c63b618cd619 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -3,6 +3,7 @@
3/* (C) 1999-2001 Paul `Rusty' Russell 3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -40,7 +41,7 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
40 41
41/* nf_conntrack_expect helper functions */ 42/* nf_conntrack_expect helper functions */
42void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, 43void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
43 u32 pid, int report) 44 u32 portid, int report)
44{ 45{
45 struct nf_conn_help *master_help = nfct_help(exp->master); 46 struct nf_conn_help *master_help = nfct_help(exp->master);
46 struct net *net = nf_ct_exp_net(exp); 47 struct net *net = nf_ct_exp_net(exp);
@@ -54,7 +55,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
54 hlist_del(&exp->lnode); 55 hlist_del(&exp->lnode);
55 master_help->expecting[exp->class]--; 56 master_help->expecting[exp->class]--;
56 57
57 nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report); 58 nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
58 nf_ct_expect_put(exp); 59 nf_ct_expect_put(exp);
59 60
60 NF_CT_STAT_INC(net, expect_delete); 61 NF_CT_STAT_INC(net, expect_delete);
@@ -412,7 +413,7 @@ out:
412} 413}
413 414
414int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 415int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
415 u32 pid, int report) 416 u32 portid, int report)
416{ 417{
417 int ret; 418 int ret;
418 419
@@ -425,7 +426,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
425 if (ret < 0) 426 if (ret < 0)
426 goto out; 427 goto out;
427 spin_unlock_bh(&nf_conntrack_lock); 428 spin_unlock_bh(&nf_conntrack_lock);
428 nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); 429 nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
429 return ret; 430 return ret;
430out: 431out:
431 spin_unlock_bh(&nf_conntrack_lock); 432 spin_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 62fb8faedb80..6b217074237b 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -3,6 +3,7 @@
3/* (C) 1999-2001 Paul `Rusty' Russell 3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 7df7b36d2e24..bdebd03bc8cd 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -2,6 +2,7 @@
2 * H.323 connection tracking helper 2 * H.323 connection tracking helper
3 * 3 *
4 * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> 4 * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
5 * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This source code is licensed under General Public License version 2. 7 * This source code is licensed under General Public License version 2.
7 * 8 *
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 94b4b9853f60..974a2a4adefa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -3,6 +3,7 @@
3/* (C) 1999-2001 Paul `Rusty' Russell 3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -353,7 +354,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
353 /* rcu_read_lock()ed by nf_hook_slow */ 354 /* rcu_read_lock()ed by nf_hook_slow */
354 helper = rcu_dereference(help->helper); 355 helper = rcu_dereference(help->helper);
355 356
356 nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, 357 nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
357 "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); 358 "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);
358 359
359 va_end(args); 360 va_end(args);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 70985c5d0ffa..0fd2976db7ee 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -1,6 +1,7 @@
1/* IRC extension for IP connection tracking, Version 1.21 1/* IRC extension for IP connection tracking, Version 1.21
2 * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org> 2 * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
3 * based on RR's ip_conntrack_ftp.c 3 * based on RR's ip_conntrack_ftp.c
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9904b15f600e..6d0f8a17c5b7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2409,6 +2409,92 @@ out:
2409 return skb->len; 2409 return skb->len;
2410} 2410}
2411 2411
2412static int
2413ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
2414{
2415 struct nf_conntrack_expect *exp, *last;
2416 struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
2417 struct nf_conn *ct = cb->data;
2418 struct nf_conn_help *help = nfct_help(ct);
2419 u_int8_t l3proto = nfmsg->nfgen_family;
2420
2421 if (cb->args[0])
2422 return 0;
2423
2424 rcu_read_lock();
2425 last = (struct nf_conntrack_expect *)cb->args[1];
2426restart:
2427 hlist_for_each_entry(exp, &help->expectations, lnode) {
2428 if (l3proto && exp->tuple.src.l3num != l3proto)
2429 continue;
2430 if (cb->args[1]) {
2431 if (exp != last)
2432 continue;
2433 cb->args[1] = 0;
2434 }
2435 if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid,
2436 cb->nlh->nlmsg_seq,
2437 IPCTNL_MSG_EXP_NEW,
2438 exp) < 0) {
2439 if (!atomic_inc_not_zero(&exp->use))
2440 continue;
2441 cb->args[1] = (unsigned long)exp;
2442 goto out;
2443 }
2444 }
2445 if (cb->args[1]) {
2446 cb->args[1] = 0;
2447 goto restart;
2448 }
2449 cb->args[0] = 1;
2450out:
2451 rcu_read_unlock();
2452 if (last)
2453 nf_ct_expect_put(last);
2454
2455 return skb->len;
2456}
2457
2458static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
2459 const struct nlmsghdr *nlh,
2460 const struct nlattr * const cda[])
2461{
2462 int err;
2463 struct net *net = sock_net(ctnl);
2464 struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2465 u_int8_t u3 = nfmsg->nfgen_family;
2466 struct nf_conntrack_tuple tuple;
2467 struct nf_conntrack_tuple_hash *h;
2468 struct nf_conn *ct;
2469 u16 zone = 0;
2470 struct netlink_dump_control c = {
2471 .dump = ctnetlink_exp_ct_dump_table,
2472 .done = ctnetlink_exp_done,
2473 };
2474
2475 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
2476 if (err < 0)
2477 return err;
2478
2479 if (cda[CTA_EXPECT_ZONE]) {
2480 err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
2481 if (err < 0)
2482 return err;
2483 }
2484
2485 h = nf_conntrack_find_get(net, zone, &tuple);
2486 if (!h)
2487 return -ENOENT;
2488
2489 ct = nf_ct_tuplehash_to_ctrack(h);
2490 c.data = ct;
2491
2492 err = netlink_dump_start(ctnl, skb, nlh, &c);
2493 nf_ct_put(ct);
2494
2495 return err;
2496}
2497
2412static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { 2498static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
2413 [CTA_EXPECT_MASTER] = { .type = NLA_NESTED }, 2499 [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
2414 [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED }, 2500 [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
@@ -2439,11 +2525,15 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
2439 int err; 2525 int err;
2440 2526
2441 if (nlh->nlmsg_flags & NLM_F_DUMP) { 2527 if (nlh->nlmsg_flags & NLM_F_DUMP) {
2442 struct netlink_dump_control c = { 2528 if (cda[CTA_EXPECT_MASTER])
2443 .dump = ctnetlink_exp_dump_table, 2529 return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda);
2444 .done = ctnetlink_exp_done, 2530 else {
2445 }; 2531 struct netlink_dump_control c = {
2446 return netlink_dump_start(ctnl, skb, nlh, &c); 2532 .dump = ctnetlink_exp_dump_table,
2533 .done = ctnetlink_exp_done,
2534 };
2535 return netlink_dump_start(ctnl, skb, nlh, &c);
2536 }
2447 } 2537 }
2448 2538
2449 err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); 2539 err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index e6678d2b624e..7bd03decd36c 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -11,6 +11,8 @@
11 * 11 *
12 * Development of this code funded by Astaro AG (http://www.astaro.com/) 12 * Development of this code funded by Astaro AG (http://www.astaro.com/)
13 * 13 *
14 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
15 *
14 * Limitations: 16 * Limitations:
15 * - We blindly assume that control connections are always 17 * - We blindly assume that control connections are always
16 * established in PNS->PAC direction. This is a violation 18 * established in PNS->PAC direction. This is a violation
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 58ab4050830c..0ab9636ac57e 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -3,6 +3,7 @@
3/* (C) 1999-2001 Paul `Rusty' Russell 3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index ba65b2041eb4..a99b6c3427b0 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
456 456
457out_invalid: 457out_invalid:
458 if (LOG_INVALID(net, IPPROTO_DCCP)) 458 if (LOG_INVALID(net, IPPROTO_DCCP))
459 nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); 459 nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
460 NULL, msg);
460 return false; 461 return false;
461} 462}
462 463
@@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
542 543
543 spin_unlock_bh(&ct->lock); 544 spin_unlock_bh(&ct->lock);
544 if (LOG_INVALID(net, IPPROTO_DCCP)) 545 if (LOG_INVALID(net, IPPROTO_DCCP))
545 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 546 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
546 "nf_ct_dccp: invalid packet ignored "); 547 "nf_ct_dccp: invalid packet ignored ");
547 return NF_ACCEPT; 548 return NF_ACCEPT;
548 case CT_DCCP_INVALID: 549 case CT_DCCP_INVALID:
549 spin_unlock_bh(&ct->lock); 550 spin_unlock_bh(&ct->lock);
550 if (LOG_INVALID(net, IPPROTO_DCCP)) 551 if (LOG_INVALID(net, IPPROTO_DCCP))
551 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 552 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
552 "nf_ct_dccp: invalid state transition "); 553 "nf_ct_dccp: invalid state transition ");
553 return -NF_ACCEPT; 554 return -NF_ACCEPT;
554 } 555 }
@@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
613 614
614out_invalid: 615out_invalid:
615 if (LOG_INVALID(net, IPPROTO_DCCP)) 616 if (LOG_INVALID(net, IPPROTO_DCCP))
616 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); 617 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg);
617 return -NF_ACCEPT; 618 return -NF_ACCEPT;
618} 619}
619 620
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 155ce9f8a0db..9d9c0dade602 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -21,6 +21,7 @@
21 * 21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/) 22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 * 23 *
24 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
24 */ 25 */
25 26
26#include <linux/module.h> 27#include <linux/module.h>
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index ec83536def9a..1314d33f6bcf 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -1,6 +1,9 @@
1/* 1/*
2 * Connection tracking protocol helper module for SCTP. 2 * Connection tracking protocol helper module for SCTP.
3 * 3 *
4 * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com>
5 * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net>
6 *
4 * SCTP is defined in RFC 2960. References to various sections in this code 7 * SCTP is defined in RFC 2960. References to various sections in this code
5 * are to this RFC. 8 * are to this RFC.
6 * 9 *
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 83876e9877f1..4d4d8f1d01fc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1,5 +1,7 @@
1/* (C) 1999-2001 Paul `Rusty' Russell 1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
3 * 5 *
4 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -720,7 +722,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
720 tn->tcp_be_liberal) 722 tn->tcp_be_liberal)
721 res = true; 723 res = true;
722 if (!res && LOG_INVALID(net, IPPROTO_TCP)) 724 if (!res && LOG_INVALID(net, IPPROTO_TCP))
723 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 725 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
724 "nf_ct_tcp: %s ", 726 "nf_ct_tcp: %s ",
725 before(seq, sender->td_maxend + 1) ? 727 before(seq, sender->td_maxend + 1) ?
726 after(end, sender->td_end - receiver->td_maxwin - 1) ? 728 after(end, sender->td_end - receiver->td_maxwin - 1) ?
@@ -772,7 +774,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
772 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 774 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
773 if (th == NULL) { 775 if (th == NULL) {
774 if (LOG_INVALID(net, IPPROTO_TCP)) 776 if (LOG_INVALID(net, IPPROTO_TCP))
775 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 777 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
776 "nf_ct_tcp: short packet "); 778 "nf_ct_tcp: short packet ");
777 return -NF_ACCEPT; 779 return -NF_ACCEPT;
778 } 780 }
@@ -780,7 +782,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
780 /* Not whole TCP header or malformed packet */ 782 /* Not whole TCP header or malformed packet */
781 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { 783 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
782 if (LOG_INVALID(net, IPPROTO_TCP)) 784 if (LOG_INVALID(net, IPPROTO_TCP))
783 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 785 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
784 "nf_ct_tcp: truncated/malformed packet "); 786 "nf_ct_tcp: truncated/malformed packet ");
785 return -NF_ACCEPT; 787 return -NF_ACCEPT;
786 } 788 }
@@ -793,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
793 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 795 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
794 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { 796 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
795 if (LOG_INVALID(net, IPPROTO_TCP)) 797 if (LOG_INVALID(net, IPPROTO_TCP))
796 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 798 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
797 "nf_ct_tcp: bad TCP checksum "); 799 "nf_ct_tcp: bad TCP checksum ");
798 return -NF_ACCEPT; 800 return -NF_ACCEPT;
799 } 801 }
@@ -802,7 +804,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
802 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); 804 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
803 if (!tcp_valid_flags[tcpflags]) { 805 if (!tcp_valid_flags[tcpflags]) {
804 if (LOG_INVALID(net, IPPROTO_TCP)) 806 if (LOG_INVALID(net, IPPROTO_TCP))
805 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 807 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
806 "nf_ct_tcp: invalid TCP flag combination "); 808 "nf_ct_tcp: invalid TCP flag combination ");
807 return -NF_ACCEPT; 809 return -NF_ACCEPT;
808 } 810 }
@@ -949,7 +951,7 @@ static int tcp_packet(struct nf_conn *ct,
949 } 951 }
950 spin_unlock_bh(&ct->lock); 952 spin_unlock_bh(&ct->lock);
951 if (LOG_INVALID(net, IPPROTO_TCP)) 953 if (LOG_INVALID(net, IPPROTO_TCP))
952 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 954 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
953 "nf_ct_tcp: invalid packet ignored in " 955 "nf_ct_tcp: invalid packet ignored in "
954 "state %s ", tcp_conntrack_names[old_state]); 956 "state %s ", tcp_conntrack_names[old_state]);
955 return NF_ACCEPT; 957 return NF_ACCEPT;
@@ -959,7 +961,7 @@ static int tcp_packet(struct nf_conn *ct,
959 dir, get_conntrack_index(th), old_state); 961 dir, get_conntrack_index(th), old_state);
960 spin_unlock_bh(&ct->lock); 962 spin_unlock_bh(&ct->lock);
961 if (LOG_INVALID(net, IPPROTO_TCP)) 963 if (LOG_INVALID(net, IPPROTO_TCP))
962 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 964 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
963 "nf_ct_tcp: invalid state "); 965 "nf_ct_tcp: invalid state ");
964 return -NF_ACCEPT; 966 return -NF_ACCEPT;
965 case TCP_CONNTRACK_CLOSE: 967 case TCP_CONNTRACK_CLOSE:
@@ -969,8 +971,8 @@ static int tcp_packet(struct nf_conn *ct,
969 /* Invalid RST */ 971 /* Invalid RST */
970 spin_unlock_bh(&ct->lock); 972 spin_unlock_bh(&ct->lock);
971 if (LOG_INVALID(net, IPPROTO_TCP)) 973 if (LOG_INVALID(net, IPPROTO_TCP))
972 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 974 nf_log_packet(net, pf, 0, skb, NULL, NULL,
973 "nf_ct_tcp: invalid RST "); 975 NULL, "nf_ct_tcp: invalid RST ");
974 return -NF_ACCEPT; 976 return -NF_ACCEPT;
975 } 977 }
976 if (index == TCP_RST_SET 978 if (index == TCP_RST_SET
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 59623cc56e8d..9d7721cbce4b 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -1,5 +1,6 @@
1/* (C) 1999-2001 Paul `Rusty' Russell 1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -119,7 +120,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
119 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 120 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
120 if (hdr == NULL) { 121 if (hdr == NULL) {
121 if (LOG_INVALID(net, IPPROTO_UDP)) 122 if (LOG_INVALID(net, IPPROTO_UDP))
122 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 123 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
123 "nf_ct_udp: short packet "); 124 "nf_ct_udp: short packet ");
124 return -NF_ACCEPT; 125 return -NF_ACCEPT;
125 } 126 }
@@ -127,7 +128,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
127 /* Truncated/malformed packets */ 128 /* Truncated/malformed packets */
128 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { 129 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
129 if (LOG_INVALID(net, IPPROTO_UDP)) 130 if (LOG_INVALID(net, IPPROTO_UDP))
130 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 131 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
131 "nf_ct_udp: truncated/malformed packet "); 132 "nf_ct_udp: truncated/malformed packet ");
132 return -NF_ACCEPT; 133 return -NF_ACCEPT;
133 } 134 }
@@ -143,7 +144,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
143 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 144 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
144 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { 145 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
145 if (LOG_INVALID(net, IPPROTO_UDP)) 146 if (LOG_INVALID(net, IPPROTO_UDP))
146 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 147 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
147 "nf_ct_udp: bad UDP checksum "); 148 "nf_ct_udp: bad UDP checksum ");
148 return -NF_ACCEPT; 149 return -NF_ACCEPT;
149 } 150 }
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index ca969f6273f7..2750e6c69f82 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
131 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 131 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
132 if (hdr == NULL) { 132 if (hdr == NULL) {
133 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 133 if (LOG_INVALID(net, IPPROTO_UDPLITE))
134 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 134 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
135 "nf_ct_udplite: short packet "); 135 "nf_ct_udplite: short packet ");
136 return -NF_ACCEPT; 136 return -NF_ACCEPT;
137 } 137 }
@@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
141 cscov = udplen; 141 cscov = udplen;
142 else if (cscov < sizeof(*hdr) || cscov > udplen) { 142 else if (cscov < sizeof(*hdr) || cscov > udplen) {
143 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 143 if (LOG_INVALID(net, IPPROTO_UDPLITE))
144 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 144 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
145 "nf_ct_udplite: invalid checksum coverage "); 145 "nf_ct_udplite: invalid checksum coverage ");
146 return -NF_ACCEPT; 146 return -NF_ACCEPT;
147 } 147 }
@@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
149 /* UDPLITE mandates checksums */ 149 /* UDPLITE mandates checksums */
150 if (!hdr->check) { 150 if (!hdr->check) {
151 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 151 if (LOG_INVALID(net, IPPROTO_UDPLITE))
152 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 152 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
153 "nf_ct_udplite: checksum missing "); 153 "nf_ct_udplite: checksum missing ");
154 return -NF_ACCEPT; 154 return -NF_ACCEPT;
155 } 155 }
@@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
159 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, 159 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
160 pf)) { 160 pf)) {
161 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 161 if (LOG_INVALID(net, IPPROTO_UDPLITE))
162 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 162 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
163 "nf_ct_udplite: bad UDPLite checksum "); 163 "nf_ct_udplite: bad UDPLite checksum ");
164 return -NF_ACCEPT; 164 return -NF_ACCEPT;
165 } 165 }
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index fedee3943661..bd700b4013c1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,5 +1,6 @@
1/* (C) 1999-2001 Paul `Rusty' Russell 1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -545,16 +546,20 @@ out_init:
545 return ret; 546 return ret;
546} 547}
547 548
548static void nf_conntrack_pernet_exit(struct net *net) 549static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
549{ 550{
550 nf_conntrack_standalone_fini_sysctl(net); 551 struct net *net;
551 nf_conntrack_standalone_fini_proc(net); 552
552 nf_conntrack_cleanup_net(net); 553 list_for_each_entry(net, net_exit_list, exit_list) {
554 nf_conntrack_standalone_fini_sysctl(net);
555 nf_conntrack_standalone_fini_proc(net);
556 }
557 nf_conntrack_cleanup_net_list(net_exit_list);
553} 558}
554 559
555static struct pernet_operations nf_conntrack_net_ops = { 560static struct pernet_operations nf_conntrack_net_ops = {
556 .init = nf_conntrack_pernet_init, 561 .init = nf_conntrack_pernet_init,
557 .exit = nf_conntrack_pernet_exit, 562 .exit_batch = nf_conntrack_pernet_exit,
558}; 563};
559 564
560static int __init nf_conntrack_standalone_init(void) 565static int __init nf_conntrack_standalone_init(void)
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index e9936c830208..e68ab4fbd71f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -1,5 +1,5 @@
1/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> 1/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
2 * 2 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation. 5 * published by the Free Software Foundation.
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 9e312695c818..388656d5a9ec 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -16,7 +16,6 @@
16#define NF_LOG_PREFIXLEN 128 16#define NF_LOG_PREFIXLEN 128
17#define NFLOGGER_NAME_LEN 64 17#define NFLOGGER_NAME_LEN 64
18 18
19static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
20static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; 19static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
21static DEFINE_MUTEX(nf_log_mutex); 20static DEFINE_MUTEX(nf_log_mutex);
22 21
@@ -32,13 +31,46 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
32 return NULL; 31 return NULL;
33} 32}
34 33
34void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
35{
36 const struct nf_logger *log;
37
38 if (pf == NFPROTO_UNSPEC)
39 return;
40
41 mutex_lock(&nf_log_mutex);
42 log = rcu_dereference_protected(net->nf.nf_loggers[pf],
43 lockdep_is_held(&nf_log_mutex));
44 if (log == NULL)
45 rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
46
47 mutex_unlock(&nf_log_mutex);
48}
49EXPORT_SYMBOL(nf_log_set);
50
51void nf_log_unset(struct net *net, const struct nf_logger *logger)
52{
53 int i;
54 const struct nf_logger *log;
55
56 mutex_lock(&nf_log_mutex);
57 for (i = 0; i < NFPROTO_NUMPROTO; i++) {
58 log = rcu_dereference_protected(net->nf.nf_loggers[i],
59 lockdep_is_held(&nf_log_mutex));
60 if (log == logger)
61 RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL);
62 }
63 mutex_unlock(&nf_log_mutex);
64 synchronize_rcu();
65}
66EXPORT_SYMBOL(nf_log_unset);
67
35/* return EEXIST if the same logger is registered, 0 on success. */ 68/* return EEXIST if the same logger is registered, 0 on success. */
36int nf_log_register(u_int8_t pf, struct nf_logger *logger) 69int nf_log_register(u_int8_t pf, struct nf_logger *logger)
37{ 70{
38 const struct nf_logger *llog;
39 int i; 71 int i;
40 72
41 if (pf >= ARRAY_SIZE(nf_loggers)) 73 if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers))
42 return -EINVAL; 74 return -EINVAL;
43 75
44 for (i = 0; i < ARRAY_SIZE(logger->list); i++) 76 for (i = 0; i < ARRAY_SIZE(logger->list); i++)
@@ -52,10 +84,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
52 } else { 84 } else {
53 /* register at end of list to honor first register win */ 85 /* register at end of list to honor first register win */
54 list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); 86 list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
55 llog = rcu_dereference_protected(nf_loggers[pf],
56 lockdep_is_held(&nf_log_mutex));
57 if (llog == NULL)
58 rcu_assign_pointer(nf_loggers[pf], logger);
59 } 87 }
60 88
61 mutex_unlock(&nf_log_mutex); 89 mutex_unlock(&nf_log_mutex);
@@ -66,49 +94,43 @@ EXPORT_SYMBOL(nf_log_register);
66 94
67void nf_log_unregister(struct nf_logger *logger) 95void nf_log_unregister(struct nf_logger *logger)
68{ 96{
69 const struct nf_logger *c_logger;
70 int i; 97 int i;
71 98
72 mutex_lock(&nf_log_mutex); 99 mutex_lock(&nf_log_mutex);
73 for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { 100 for (i = 0; i < NFPROTO_NUMPROTO; i++)
74 c_logger = rcu_dereference_protected(nf_loggers[i],
75 lockdep_is_held(&nf_log_mutex));
76 if (c_logger == logger)
77 RCU_INIT_POINTER(nf_loggers[i], NULL);
78 list_del(&logger->list[i]); 101 list_del(&logger->list[i]);
79 }
80 mutex_unlock(&nf_log_mutex); 102 mutex_unlock(&nf_log_mutex);
81
82 synchronize_rcu();
83} 103}
84EXPORT_SYMBOL(nf_log_unregister); 104EXPORT_SYMBOL(nf_log_unregister);
85 105
86int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) 106int nf_log_bind_pf(struct net *net, u_int8_t pf,
107 const struct nf_logger *logger)
87{ 108{
88 if (pf >= ARRAY_SIZE(nf_loggers)) 109 if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
89 return -EINVAL; 110 return -EINVAL;
90 mutex_lock(&nf_log_mutex); 111 mutex_lock(&nf_log_mutex);
91 if (__find_logger(pf, logger->name) == NULL) { 112 if (__find_logger(pf, logger->name) == NULL) {
92 mutex_unlock(&nf_log_mutex); 113 mutex_unlock(&nf_log_mutex);
93 return -ENOENT; 114 return -ENOENT;
94 } 115 }
95 rcu_assign_pointer(nf_loggers[pf], logger); 116 rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
96 mutex_unlock(&nf_log_mutex); 117 mutex_unlock(&nf_log_mutex);
97 return 0; 118 return 0;
98} 119}
99EXPORT_SYMBOL(nf_log_bind_pf); 120EXPORT_SYMBOL(nf_log_bind_pf);
100 121
101void nf_log_unbind_pf(u_int8_t pf) 122void nf_log_unbind_pf(struct net *net, u_int8_t pf)
102{ 123{
103 if (pf >= ARRAY_SIZE(nf_loggers)) 124 if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
104 return; 125 return;
105 mutex_lock(&nf_log_mutex); 126 mutex_lock(&nf_log_mutex);
106 RCU_INIT_POINTER(nf_loggers[pf], NULL); 127 RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL);
107 mutex_unlock(&nf_log_mutex); 128 mutex_unlock(&nf_log_mutex);
108} 129}
109EXPORT_SYMBOL(nf_log_unbind_pf); 130EXPORT_SYMBOL(nf_log_unbind_pf);
110 131
111void nf_log_packet(u_int8_t pf, 132void nf_log_packet(struct net *net,
133 u_int8_t pf,
112 unsigned int hooknum, 134 unsigned int hooknum,
113 const struct sk_buff *skb, 135 const struct sk_buff *skb,
114 const struct net_device *in, 136 const struct net_device *in,
@@ -121,7 +143,7 @@ void nf_log_packet(u_int8_t pf,
121 const struct nf_logger *logger; 143 const struct nf_logger *logger;
122 144
123 rcu_read_lock(); 145 rcu_read_lock();
124 logger = rcu_dereference(nf_loggers[pf]); 146 logger = rcu_dereference(net->nf.nf_loggers[pf]);
125 if (logger) { 147 if (logger) {
126 va_start(args, fmt); 148 va_start(args, fmt);
127 vsnprintf(prefix, sizeof(prefix), fmt, args); 149 vsnprintf(prefix, sizeof(prefix), fmt, args);
@@ -135,9 +157,11 @@ EXPORT_SYMBOL(nf_log_packet);
135#ifdef CONFIG_PROC_FS 157#ifdef CONFIG_PROC_FS
136static void *seq_start(struct seq_file *seq, loff_t *pos) 158static void *seq_start(struct seq_file *seq, loff_t *pos)
137{ 159{
160 struct net *net = seq_file_net(seq);
161
138 mutex_lock(&nf_log_mutex); 162 mutex_lock(&nf_log_mutex);
139 163
140 if (*pos >= ARRAY_SIZE(nf_loggers)) 164 if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
141 return NULL; 165 return NULL;
142 166
143 return pos; 167 return pos;
@@ -145,9 +169,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos)
145 169
146static void *seq_next(struct seq_file *s, void *v, loff_t *pos) 170static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
147{ 171{
172 struct net *net = seq_file_net(s);
173
148 (*pos)++; 174 (*pos)++;
149 175
150 if (*pos >= ARRAY_SIZE(nf_loggers)) 176 if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
151 return NULL; 177 return NULL;
152 178
153 return pos; 179 return pos;
@@ -164,8 +190,9 @@ static int seq_show(struct seq_file *s, void *v)
164 const struct nf_logger *logger; 190 const struct nf_logger *logger;
165 struct nf_logger *t; 191 struct nf_logger *t;
166 int ret; 192 int ret;
193 struct net *net = seq_file_net(s);
167 194
168 logger = rcu_dereference_protected(nf_loggers[*pos], 195 logger = rcu_dereference_protected(net->nf.nf_loggers[*pos],
169 lockdep_is_held(&nf_log_mutex)); 196 lockdep_is_held(&nf_log_mutex));
170 197
171 if (!logger) 198 if (!logger)
@@ -199,7 +226,8 @@ static const struct seq_operations nflog_seq_ops = {
199 226
200static int nflog_open(struct inode *inode, struct file *file) 227static int nflog_open(struct inode *inode, struct file *file)
201{ 228{
202 return seq_open(file, &nflog_seq_ops); 229 return seq_open_net(inode, file, &nflog_seq_ops,
230 sizeof(struct seq_net_private));
203} 231}
204 232
205static const struct file_operations nflog_file_ops = { 233static const struct file_operations nflog_file_ops = {
@@ -207,7 +235,7 @@ static const struct file_operations nflog_file_ops = {
207 .open = nflog_open, 235 .open = nflog_open,
208 .read = seq_read, 236 .read = seq_read,
209 .llseek = seq_lseek, 237 .llseek = seq_lseek,
210 .release = seq_release, 238 .release = seq_release_net,
211}; 239};
212 240
213 241
@@ -216,7 +244,6 @@ static const struct file_operations nflog_file_ops = {
216#ifdef CONFIG_SYSCTL 244#ifdef CONFIG_SYSCTL
217static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; 245static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
218static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; 246static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
219static struct ctl_table_header *nf_log_dir_header;
220 247
221static int nf_log_proc_dostring(ctl_table *table, int write, 248static int nf_log_proc_dostring(ctl_table *table, int write,
222 void __user *buffer, size_t *lenp, loff_t *ppos) 249 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -226,6 +253,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
226 size_t size = *lenp; 253 size_t size = *lenp;
227 int r = 0; 254 int r = 0;
228 int tindex = (unsigned long)table->extra1; 255 int tindex = (unsigned long)table->extra1;
256 struct net *net = current->nsproxy->net_ns;
229 257
230 if (write) { 258 if (write) {
231 if (size > sizeof(buf)) 259 if (size > sizeof(buf))
@@ -234,7 +262,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
234 return -EFAULT; 262 return -EFAULT;
235 263
236 if (!strcmp(buf, "NONE")) { 264 if (!strcmp(buf, "NONE")) {
237 nf_log_unbind_pf(tindex); 265 nf_log_unbind_pf(net, tindex);
238 return 0; 266 return 0;
239 } 267 }
240 mutex_lock(&nf_log_mutex); 268 mutex_lock(&nf_log_mutex);
@@ -243,11 +271,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
243 mutex_unlock(&nf_log_mutex); 271 mutex_unlock(&nf_log_mutex);
244 return -ENOENT; 272 return -ENOENT;
245 } 273 }
246 rcu_assign_pointer(nf_loggers[tindex], logger); 274 rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
247 mutex_unlock(&nf_log_mutex); 275 mutex_unlock(&nf_log_mutex);
248 } else { 276 } else {
249 mutex_lock(&nf_log_mutex); 277 mutex_lock(&nf_log_mutex);
250 logger = rcu_dereference_protected(nf_loggers[tindex], 278 logger = rcu_dereference_protected(net->nf.nf_loggers[tindex],
251 lockdep_is_held(&nf_log_mutex)); 279 lockdep_is_held(&nf_log_mutex));
252 if (!logger) 280 if (!logger)
253 table->data = "NONE"; 281 table->data = "NONE";
@@ -260,49 +288,111 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
260 return r; 288 return r;
261} 289}
262 290
263static __init int netfilter_log_sysctl_init(void) 291static int netfilter_log_sysctl_init(struct net *net)
264{ 292{
265 int i; 293 int i;
266 294 struct ctl_table *table;
267 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { 295
268 snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); 296 table = nf_log_sysctl_table;
269 nf_log_sysctl_table[i].procname = 297 if (!net_eq(net, &init_net)) {
270 nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; 298 table = kmemdup(nf_log_sysctl_table,
271 nf_log_sysctl_table[i].data = NULL; 299 sizeof(nf_log_sysctl_table),
272 nf_log_sysctl_table[i].maxlen = 300 GFP_KERNEL);
273 NFLOGGER_NAME_LEN * sizeof(char); 301 if (!table)
274 nf_log_sysctl_table[i].mode = 0644; 302 goto err_alloc;
275 nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; 303 } else {
276 nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; 304 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
305 snprintf(nf_log_sysctl_fnames[i],
306 3, "%d", i);
307 nf_log_sysctl_table[i].procname =
308 nf_log_sysctl_fnames[i];
309 nf_log_sysctl_table[i].data = NULL;
310 nf_log_sysctl_table[i].maxlen =
311 NFLOGGER_NAME_LEN * sizeof(char);
312 nf_log_sysctl_table[i].mode = 0644;
313 nf_log_sysctl_table[i].proc_handler =
314 nf_log_proc_dostring;
315 nf_log_sysctl_table[i].extra1 =
316 (void *)(unsigned long) i;
317 }
277 } 318 }
278 319
279 nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log", 320 net->nf.nf_log_dir_header = register_net_sysctl(net,
280 nf_log_sysctl_table); 321 "net/netfilter/nf_log",
281 if (!nf_log_dir_header) 322 table);
282 return -ENOMEM; 323 if (!net->nf.nf_log_dir_header)
324 goto err_reg;
283 325
284 return 0; 326 return 0;
327
328err_reg:
329 if (!net_eq(net, &init_net))
330 kfree(table);
331err_alloc:
332 return -ENOMEM;
333}
334
335static void netfilter_log_sysctl_exit(struct net *net)
336{
337 struct ctl_table *table;
338
339 table = net->nf.nf_log_dir_header->ctl_table_arg;
340 unregister_net_sysctl_table(net->nf.nf_log_dir_header);
341 if (!net_eq(net, &init_net))
342 kfree(table);
285} 343}
286#else 344#else
287static __init int netfilter_log_sysctl_init(void) 345static int netfilter_log_sysctl_init(struct net *net)
288{ 346{
289 return 0; 347 return 0;
290} 348}
349
350static void netfilter_log_sysctl_exit(struct net *net)
351{
352}
291#endif /* CONFIG_SYSCTL */ 353#endif /* CONFIG_SYSCTL */
292 354
293int __init netfilter_log_init(void) 355static int __net_init nf_log_net_init(struct net *net)
294{ 356{
295 int i, r; 357 int ret = -ENOMEM;
358
296#ifdef CONFIG_PROC_FS 359#ifdef CONFIG_PROC_FS
297 if (!proc_create("nf_log", S_IRUGO, 360 if (!proc_create("nf_log", S_IRUGO,
298 proc_net_netfilter, &nflog_file_ops)) 361 net->nf.proc_netfilter, &nflog_file_ops))
299 return -1; 362 return ret;
300#endif 363#endif
364 ret = netfilter_log_sysctl_init(net);
365 if (ret < 0)
366 goto out_sysctl;
367
368 return 0;
301 369
302 /* Errors will trigger panic, unroll on error is unnecessary. */ 370out_sysctl:
303 r = netfilter_log_sysctl_init(); 371 /* For init_net: errors will trigger panic, don't unroll on error. */
304 if (r < 0) 372 if (!net_eq(net, &init_net))
305 return r; 373 remove_proc_entry("nf_log", net->nf.proc_netfilter);
374
375 return ret;
376}
377
378static void __net_exit nf_log_net_exit(struct net *net)
379{
380 netfilter_log_sysctl_exit(net);
381 remove_proc_entry("nf_log", net->nf.proc_netfilter);
382}
383
384static struct pernet_operations nf_log_net_ops = {
385 .init = nf_log_net_init,
386 .exit = nf_log_net_exit,
387};
388
389int __init netfilter_log_init(void)
390{
391 int i, ret;
392
393 ret = register_pernet_subsys(&nf_log_net_ops);
394 if (ret < 0)
395 return ret;
306 396
307 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) 397 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
308 INIT_LIST_HEAD(&(nf_loggers_l[i])); 398 INIT_LIST_HEAD(&(nf_loggers_l[i]));
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index 3b67c9d11273..eb772380a202 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -1,6 +1,7 @@
1/* Amanda extension for TCP NAT alteration. 1/* Amanda extension for TCP NAT alteration.
2 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> 2 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
3 * based on a copy of HW's ip_nat_irc.c as well as other modules 3 * based on a copy of HW's ip_nat_irc.c as well as other modules
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index ad24be070e53..2e469ca2ca55 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -87,9 +87,10 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
87 struct flowi fl; 87 struct flowi fl;
88 unsigned int hh_len; 88 unsigned int hh_len;
89 struct dst_entry *dst; 89 struct dst_entry *dst;
90 int err;
90 91
91 if (xfrm_decode_session(skb, &fl, family) < 0) 92 err = xfrm_decode_session(skb, &fl, family);
92 return -1; 93 return err;
93 94
94 dst = skb_dst(skb); 95 dst = skb_dst(skb);
95 if (dst->xfrm) 96 if (dst->xfrm)
@@ -98,7 +99,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
98 99
99 dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); 100 dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
100 if (IS_ERR(dst)) 101 if (IS_ERR(dst))
101 return -1; 102 return PTR_ERR(dst);
102 103
103 skb_dst_drop(skb); 104 skb_dst_drop(skb);
104 skb_dst_set(skb, dst); 105 skb_dst_set(skb, dst);
@@ -107,7 +108,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
107 hh_len = skb_dst(skb)->dev->hard_header_len; 108 hh_len = skb_dst(skb)->dev->hard_header_len;
108 if (skb_headroom(skb) < hh_len && 109 if (skb_headroom(skb) < hh_len &&
109 pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) 110 pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
110 return -1; 111 return -ENOMEM;
111 return 0; 112 return 0;
112} 113}
113EXPORT_SYMBOL(nf_xfrm_me_harder); 114EXPORT_SYMBOL(nf_xfrm_me_harder);
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 23c2b38676a6..5fea563afe30 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org> 3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2007-2012 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index d812c1235b30..5ccf01e35390 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -1,3 +1,8 @@
1/*
2 * Rusty Russell (C)2000 -- This code is GPL.
3 * Patrick McHardy (c) 2006-2012
4 */
5
1#include <linux/kernel.h> 6#include <linux/kernel.h>
2#include <linux/slab.h> 7#include <linux/slab.h>
3#include <linux/init.h> 8#include <linux/init.h>
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 0b1b32cda307..572d87dc116f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -24,10 +24,9 @@
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <net/sock.h> 26#include <net/sock.h>
27#include <net/netlink.h>
28#include <linux/init.h> 27#include <linux/init.h>
29 28
30#include <linux/netlink.h> 29#include <net/netlink.h>
31#include <linux/netfilter/nfnetlink.h> 30#include <linux/netfilter/nfnetlink.h>
32 31
33MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
@@ -113,22 +112,30 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
113} 112}
114EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); 113EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
115 114
116int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, 115struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
116 u32 dst_portid, gfp_t gfp_mask)
117{
118 return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
119}
120EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
121
122int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
117 unsigned int group, int echo, gfp_t flags) 123 unsigned int group, int echo, gfp_t flags)
118{ 124{
119 return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); 125 return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags);
120} 126}
121EXPORT_SYMBOL_GPL(nfnetlink_send); 127EXPORT_SYMBOL_GPL(nfnetlink_send);
122 128
123int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) 129int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error)
124{ 130{
125 return netlink_set_err(net->nfnl, pid, group, error); 131 return netlink_set_err(net->nfnl, portid, group, error);
126} 132}
127EXPORT_SYMBOL_GPL(nfnetlink_set_err); 133EXPORT_SYMBOL_GPL(nfnetlink_set_err);
128 134
129int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) 135int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
136 int flags)
130{ 137{
131 return netlink_unicast(net->nfnl, skb, pid, flags); 138 return netlink_unicast(net->nfnl, skb, portid, flags);
132} 139}
133EXPORT_SYMBOL_GPL(nfnetlink_unicast); 140EXPORT_SYMBOL_GPL(nfnetlink_unicast);
134 141
@@ -144,7 +151,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
144 return -EPERM; 151 return -EPERM;
145 152
146 /* All the messages must at least contain nfgenmsg */ 153 /* All the messages must at least contain nfgenmsg */
147 if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg))) 154 if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
148 return 0; 155 return 0;
149 156
150 type = nlh->nlmsg_type; 157 type = nlh->nlmsg_type;
@@ -172,7 +179,7 @@ replay:
172 } 179 }
173 180
174 { 181 {
175 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 182 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
176 u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); 183 u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
177 struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; 184 struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
178 struct nlattr *attr = (void *)nlh + min_len; 185 struct nlattr *attr = (void *)nlh + min_len;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index f248db572972..faf1e9300d8a 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -3,6 +3,7 @@
3 * nfetlink. 3 * nfetlink.
4 * 4 *
5 * (C) 2005 by Harald Welte <laforge@netfilter.org> 5 * (C) 2005 by Harald Welte <laforge@netfilter.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * Based on the old ipv4-only ipt_ULOG.c: 8 * Based on the old ipv4-only ipt_ULOG.c:
8 * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> 9 * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
@@ -19,7 +20,7 @@
19#include <linux/ipv6.h> 20#include <linux/ipv6.h>
20#include <linux/netdevice.h> 21#include <linux/netdevice.h>
21#include <linux/netfilter.h> 22#include <linux/netfilter.h>
22#include <linux/netlink.h> 23#include <net/netlink.h>
23#include <linux/netfilter/nfnetlink.h> 24#include <linux/netfilter/nfnetlink.h>
24#include <linux/netfilter/nfnetlink_log.h> 25#include <linux/netfilter/nfnetlink_log.h>
25#include <linux/spinlock.h> 26#include <linux/spinlock.h>
@@ -32,6 +33,7 @@
32#include <linux/slab.h> 33#include <linux/slab.h>
33#include <net/sock.h> 34#include <net/sock.h>
34#include <net/netfilter/nf_log.h> 35#include <net/netfilter/nf_log.h>
36#include <net/netns/generic.h>
35#include <net/netfilter/nfnetlink_log.h> 37#include <net/netfilter/nfnetlink_log.h>
36 38
37#include <linux/atomic.h> 39#include <linux/atomic.h>
@@ -56,6 +58,7 @@ struct nfulnl_instance {
56 unsigned int qlen; /* number of nlmsgs in skb */ 58 unsigned int qlen; /* number of nlmsgs in skb */
57 struct sk_buff *skb; /* pre-allocatd skb */ 59 struct sk_buff *skb; /* pre-allocatd skb */
58 struct timer_list timer; 60 struct timer_list timer;
61 struct net *net;
59 struct user_namespace *peer_user_ns; /* User namespace of the peer process */ 62 struct user_namespace *peer_user_ns; /* User namespace of the peer process */
60 int peer_portid; /* PORTID of the peer process */ 63 int peer_portid; /* PORTID of the peer process */
61 64
@@ -71,25 +74,34 @@ struct nfulnl_instance {
71 struct rcu_head rcu; 74 struct rcu_head rcu;
72}; 75};
73 76
74static DEFINE_SPINLOCK(instances_lock);
75static atomic_t global_seq;
76
77#define INSTANCE_BUCKETS 16 77#define INSTANCE_BUCKETS 16
78static struct hlist_head instance_table[INSTANCE_BUCKETS];
79static unsigned int hash_init; 78static unsigned int hash_init;
80 79
80static int nfnl_log_net_id __read_mostly;
81
82struct nfnl_log_net {
83 spinlock_t instances_lock;
84 struct hlist_head instance_table[INSTANCE_BUCKETS];
85 atomic_t global_seq;
86};
87
88static struct nfnl_log_net *nfnl_log_pernet(struct net *net)
89{
90 return net_generic(net, nfnl_log_net_id);
91}
92
81static inline u_int8_t instance_hashfn(u_int16_t group_num) 93static inline u_int8_t instance_hashfn(u_int16_t group_num)
82{ 94{
83 return ((group_num & 0xff) % INSTANCE_BUCKETS); 95 return ((group_num & 0xff) % INSTANCE_BUCKETS);
84} 96}
85 97
86static struct nfulnl_instance * 98static struct nfulnl_instance *
87__instance_lookup(u_int16_t group_num) 99__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
88{ 100{
89 struct hlist_head *head; 101 struct hlist_head *head;
90 struct nfulnl_instance *inst; 102 struct nfulnl_instance *inst;
91 103
92 head = &instance_table[instance_hashfn(group_num)]; 104 head = &log->instance_table[instance_hashfn(group_num)];
93 hlist_for_each_entry_rcu(inst, head, hlist) { 105 hlist_for_each_entry_rcu(inst, head, hlist) {
94 if (inst->group_num == group_num) 106 if (inst->group_num == group_num)
95 return inst; 107 return inst;
@@ -104,12 +116,12 @@ instance_get(struct nfulnl_instance *inst)
104} 116}
105 117
106static struct nfulnl_instance * 118static struct nfulnl_instance *
107instance_lookup_get(u_int16_t group_num) 119instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
108{ 120{
109 struct nfulnl_instance *inst; 121 struct nfulnl_instance *inst;
110 122
111 rcu_read_lock_bh(); 123 rcu_read_lock_bh();
112 inst = __instance_lookup(group_num); 124 inst = __instance_lookup(log, group_num);
113 if (inst && !atomic_inc_not_zero(&inst->use)) 125 if (inst && !atomic_inc_not_zero(&inst->use))
114 inst = NULL; 126 inst = NULL;
115 rcu_read_unlock_bh(); 127 rcu_read_unlock_bh();
@@ -119,7 +131,11 @@ instance_lookup_get(u_int16_t group_num)
119 131
120static void nfulnl_instance_free_rcu(struct rcu_head *head) 132static void nfulnl_instance_free_rcu(struct rcu_head *head)
121{ 133{
122 kfree(container_of(head, struct nfulnl_instance, rcu)); 134 struct nfulnl_instance *inst =
135 container_of(head, struct nfulnl_instance, rcu);
136
137 put_net(inst->net);
138 kfree(inst);
123 module_put(THIS_MODULE); 139 module_put(THIS_MODULE);
124} 140}
125 141
@@ -133,13 +149,15 @@ instance_put(struct nfulnl_instance *inst)
133static void nfulnl_timer(unsigned long data); 149static void nfulnl_timer(unsigned long data);
134 150
135static struct nfulnl_instance * 151static struct nfulnl_instance *
136instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) 152instance_create(struct net *net, u_int16_t group_num,
153 int portid, struct user_namespace *user_ns)
137{ 154{
138 struct nfulnl_instance *inst; 155 struct nfulnl_instance *inst;
156 struct nfnl_log_net *log = nfnl_log_pernet(net);
139 int err; 157 int err;
140 158
141 spin_lock_bh(&instances_lock); 159 spin_lock_bh(&log->instances_lock);
142 if (__instance_lookup(group_num)) { 160 if (__instance_lookup(log, group_num)) {
143 err = -EEXIST; 161 err = -EEXIST;
144 goto out_unlock; 162 goto out_unlock;
145 } 163 }
@@ -163,6 +181,7 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
163 181
164 setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); 182 setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
165 183
184 inst->net = get_net(net);
166 inst->peer_user_ns = user_ns; 185 inst->peer_user_ns = user_ns;
167 inst->peer_portid = portid; 186 inst->peer_portid = portid;
168 inst->group_num = group_num; 187 inst->group_num = group_num;
@@ -174,14 +193,15 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
174 inst->copy_range = NFULNL_COPY_RANGE_MAX; 193 inst->copy_range = NFULNL_COPY_RANGE_MAX;
175 194
176 hlist_add_head_rcu(&inst->hlist, 195 hlist_add_head_rcu(&inst->hlist,
177 &instance_table[instance_hashfn(group_num)]); 196 &log->instance_table[instance_hashfn(group_num)]);
197
178 198
179 spin_unlock_bh(&instances_lock); 199 spin_unlock_bh(&log->instances_lock);
180 200
181 return inst; 201 return inst;
182 202
183out_unlock: 203out_unlock:
184 spin_unlock_bh(&instances_lock); 204 spin_unlock_bh(&log->instances_lock);
185 return ERR_PTR(err); 205 return ERR_PTR(err);
186} 206}
187 207
@@ -210,11 +230,12 @@ __instance_destroy(struct nfulnl_instance *inst)
210} 230}
211 231
212static inline void 232static inline void
213instance_destroy(struct nfulnl_instance *inst) 233instance_destroy(struct nfnl_log_net *log,
234 struct nfulnl_instance *inst)
214{ 235{
215 spin_lock_bh(&instances_lock); 236 spin_lock_bh(&log->instances_lock);
216 __instance_destroy(inst); 237 __instance_destroy(inst);
217 spin_unlock_bh(&instances_lock); 238 spin_unlock_bh(&log->instances_lock);
218} 239}
219 240
220static int 241static int
@@ -298,7 +319,7 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)
298} 319}
299 320
300static struct sk_buff * 321static struct sk_buff *
301nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size) 322nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)
302{ 323{
303 struct sk_buff *skb; 324 struct sk_buff *skb;
304 unsigned int n; 325 unsigned int n;
@@ -307,13 +328,14 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
307 * message. WARNING: has to be <= 128k due to slab restrictions */ 328 * message. WARNING: has to be <= 128k due to slab restrictions */
308 329
309 n = max(inst_size, pkt_size); 330 n = max(inst_size, pkt_size);
310 skb = alloc_skb(n, GFP_ATOMIC); 331 skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC);
311 if (!skb) { 332 if (!skb) {
312 if (n > pkt_size) { 333 if (n > pkt_size) {
313 /* try to allocate only as much as we need for current 334 /* try to allocate only as much as we need for current
314 * packet */ 335 * packet */
315 336
316 skb = alloc_skb(pkt_size, GFP_ATOMIC); 337 skb = nfnetlink_alloc_skb(&init_net, pkt_size,
338 peer_portid, GFP_ATOMIC);
317 if (!skb) 339 if (!skb)
318 pr_err("nfnetlink_log: can't even alloc %u bytes\n", 340 pr_err("nfnetlink_log: can't even alloc %u bytes\n",
319 pkt_size); 341 pkt_size);
@@ -336,7 +358,7 @@ __nfulnl_send(struct nfulnl_instance *inst)
336 if (!nlh) 358 if (!nlh)
337 goto out; 359 goto out;
338 } 360 }
339 status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid, 361 status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
340 MSG_DONTWAIT); 362 MSG_DONTWAIT);
341 363
342 inst->qlen = 0; 364 inst->qlen = 0;
@@ -370,7 +392,8 @@ nfulnl_timer(unsigned long data)
370/* This is an inline function, we don't really care about a long 392/* This is an inline function, we don't really care about a long
371 * list of arguments */ 393 * list of arguments */
372static inline int 394static inline int
373__build_packet_message(struct nfulnl_instance *inst, 395__build_packet_message(struct nfnl_log_net *log,
396 struct nfulnl_instance *inst,
374 const struct sk_buff *skb, 397 const struct sk_buff *skb,
375 unsigned int data_len, 398 unsigned int data_len,
376 u_int8_t pf, 399 u_int8_t pf,
@@ -536,7 +559,7 @@ __build_packet_message(struct nfulnl_instance *inst,
536 /* global sequence number */ 559 /* global sequence number */
537 if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) && 560 if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
538 nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL, 561 nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
539 htonl(atomic_inc_return(&global_seq)))) 562 htonl(atomic_inc_return(&log->global_seq))))
540 goto nla_put_failure; 563 goto nla_put_failure;
541 564
542 if (data_len) { 565 if (data_len) {
@@ -592,13 +615,15 @@ nfulnl_log_packet(u_int8_t pf,
592 const struct nf_loginfo *li; 615 const struct nf_loginfo *li;
593 unsigned int qthreshold; 616 unsigned int qthreshold;
594 unsigned int plen; 617 unsigned int plen;
618 struct net *net = dev_net(in ? in : out);
619 struct nfnl_log_net *log = nfnl_log_pernet(net);
595 620
596 if (li_user && li_user->type == NF_LOG_TYPE_ULOG) 621 if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
597 li = li_user; 622 li = li_user;
598 else 623 else
599 li = &default_loginfo; 624 li = &default_loginfo;
600 625
601 inst = instance_lookup_get(li->u.ulog.group); 626 inst = instance_lookup_get(log, li->u.ulog.group);
602 if (!inst) 627 if (!inst)
603 return; 628 return;
604 629
@@ -609,7 +634,7 @@ nfulnl_log_packet(u_int8_t pf,
609 /* FIXME: do we want to make the size calculation conditional based on 634 /* FIXME: do we want to make the size calculation conditional based on
610 * what is actually present? way more branches and checks, but more 635 * what is actually present? way more branches and checks, but more
611 * memory efficient... */ 636 * memory efficient... */
612 size = NLMSG_SPACE(sizeof(struct nfgenmsg)) 637 size = nlmsg_total_size(sizeof(struct nfgenmsg))
613 + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) 638 + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
614 + nla_total_size(sizeof(u_int32_t)) /* ifindex */ 639 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
615 + nla_total_size(sizeof(u_int32_t)) /* ifindex */ 640 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
@@ -673,14 +698,15 @@ nfulnl_log_packet(u_int8_t pf,
673 } 698 }
674 699
675 if (!inst->skb) { 700 if (!inst->skb) {
676 inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size); 701 inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz,
702 size);
677 if (!inst->skb) 703 if (!inst->skb)
678 goto alloc_failure; 704 goto alloc_failure;
679 } 705 }
680 706
681 inst->qlen++; 707 inst->qlen++;
682 708
683 __build_packet_message(inst, skb, data_len, pf, 709 __build_packet_message(log, inst, skb, data_len, pf,
684 hooknum, in, out, prefix, plen); 710 hooknum, in, out, prefix, plen);
685 711
686 if (inst->qlen >= qthreshold) 712 if (inst->qlen >= qthreshold)
@@ -709,24 +735,24 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
709 unsigned long event, void *ptr) 735 unsigned long event, void *ptr)
710{ 736{
711 struct netlink_notify *n = ptr; 737 struct netlink_notify *n = ptr;
738 struct nfnl_log_net *log = nfnl_log_pernet(n->net);
712 739
713 if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { 740 if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
714 int i; 741 int i;
715 742
716 /* destroy all instances for this portid */ 743 /* destroy all instances for this portid */
717 spin_lock_bh(&instances_lock); 744 spin_lock_bh(&log->instances_lock);
718 for (i = 0; i < INSTANCE_BUCKETS; i++) { 745 for (i = 0; i < INSTANCE_BUCKETS; i++) {
719 struct hlist_node *t2; 746 struct hlist_node *t2;
720 struct nfulnl_instance *inst; 747 struct nfulnl_instance *inst;
721 struct hlist_head *head = &instance_table[i]; 748 struct hlist_head *head = &log->instance_table[i];
722 749
723 hlist_for_each_entry_safe(inst, t2, head, hlist) { 750 hlist_for_each_entry_safe(inst, t2, head, hlist) {
724 if ((net_eq(n->net, &init_net)) && 751 if (n->portid == inst->peer_portid)
725 (n->portid == inst->peer_portid))
726 __instance_destroy(inst); 752 __instance_destroy(inst);
727 } 753 }
728 } 754 }
729 spin_unlock_bh(&instances_lock); 755 spin_unlock_bh(&log->instances_lock);
730 } 756 }
731 return NOTIFY_DONE; 757 return NOTIFY_DONE;
732} 758}
@@ -767,6 +793,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
767 u_int16_t group_num = ntohs(nfmsg->res_id); 793 u_int16_t group_num = ntohs(nfmsg->res_id);
768 struct nfulnl_instance *inst; 794 struct nfulnl_instance *inst;
769 struct nfulnl_msg_config_cmd *cmd = NULL; 795 struct nfulnl_msg_config_cmd *cmd = NULL;
796 struct net *net = sock_net(ctnl);
797 struct nfnl_log_net *log = nfnl_log_pernet(net);
770 int ret = 0; 798 int ret = 0;
771 799
772 if (nfula[NFULA_CFG_CMD]) { 800 if (nfula[NFULA_CFG_CMD]) {
@@ -776,14 +804,14 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
776 /* Commands without queue context */ 804 /* Commands without queue context */
777 switch (cmd->command) { 805 switch (cmd->command) {
778 case NFULNL_CFG_CMD_PF_BIND: 806 case NFULNL_CFG_CMD_PF_BIND:
779 return nf_log_bind_pf(pf, &nfulnl_logger); 807 return nf_log_bind_pf(net, pf, &nfulnl_logger);
780 case NFULNL_CFG_CMD_PF_UNBIND: 808 case NFULNL_CFG_CMD_PF_UNBIND:
781 nf_log_unbind_pf(pf); 809 nf_log_unbind_pf(net, pf);
782 return 0; 810 return 0;
783 } 811 }
784 } 812 }
785 813
786 inst = instance_lookup_get(group_num); 814 inst = instance_lookup_get(log, group_num);
787 if (inst && inst->peer_portid != NETLINK_CB(skb).portid) { 815 if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
788 ret = -EPERM; 816 ret = -EPERM;
789 goto out_put; 817 goto out_put;
@@ -797,9 +825,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
797 goto out_put; 825 goto out_put;
798 } 826 }
799 827
800 inst = instance_create(group_num, 828 inst = instance_create(net, group_num,
801 NETLINK_CB(skb).portid, 829 NETLINK_CB(skb).portid,
802 sk_user_ns(NETLINK_CB(skb).ssk)); 830 sk_user_ns(NETLINK_CB(skb).sk));
803 if (IS_ERR(inst)) { 831 if (IS_ERR(inst)) {
804 ret = PTR_ERR(inst); 832 ret = PTR_ERR(inst);
805 goto out; 833 goto out;
@@ -811,7 +839,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
811 goto out; 839 goto out;
812 } 840 }
813 841
814 instance_destroy(inst); 842 instance_destroy(log, inst);
815 goto out_put; 843 goto out_put;
816 default: 844 default:
817 ret = -ENOTSUPP; 845 ret = -ENOTSUPP;
@@ -894,55 +922,68 @@ static const struct nfnetlink_subsystem nfulnl_subsys = {
894 922
895#ifdef CONFIG_PROC_FS 923#ifdef CONFIG_PROC_FS
896struct iter_state { 924struct iter_state {
925 struct seq_net_private p;
897 unsigned int bucket; 926 unsigned int bucket;
898}; 927};
899 928
900static struct hlist_node *get_first(struct iter_state *st) 929static struct hlist_node *get_first(struct net *net, struct iter_state *st)
901{ 930{
931 struct nfnl_log_net *log;
902 if (!st) 932 if (!st)
903 return NULL; 933 return NULL;
904 934
935 log = nfnl_log_pernet(net);
936
905 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { 937 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
906 if (!hlist_empty(&instance_table[st->bucket])) 938 struct hlist_head *head = &log->instance_table[st->bucket];
907 return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); 939
940 if (!hlist_empty(head))
941 return rcu_dereference_bh(hlist_first_rcu(head));
908 } 942 }
909 return NULL; 943 return NULL;
910} 944}
911 945
912static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) 946static struct hlist_node *get_next(struct net *net, struct iter_state *st,
947 struct hlist_node *h)
913{ 948{
914 h = rcu_dereference_bh(hlist_next_rcu(h)); 949 h = rcu_dereference_bh(hlist_next_rcu(h));
915 while (!h) { 950 while (!h) {
951 struct nfnl_log_net *log;
952 struct hlist_head *head;
953
916 if (++st->bucket >= INSTANCE_BUCKETS) 954 if (++st->bucket >= INSTANCE_BUCKETS)
917 return NULL; 955 return NULL;
918 956
919 h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); 957 log = nfnl_log_pernet(net);
958 head = &log->instance_table[st->bucket];
959 h = rcu_dereference_bh(hlist_first_rcu(head));
920 } 960 }
921 return h; 961 return h;
922} 962}
923 963
924static struct hlist_node *get_idx(struct iter_state *st, loff_t pos) 964static struct hlist_node *get_idx(struct net *net, struct iter_state *st,
965 loff_t pos)
925{ 966{
926 struct hlist_node *head; 967 struct hlist_node *head;
927 head = get_first(st); 968 head = get_first(net, st);
928 969
929 if (head) 970 if (head)
930 while (pos && (head = get_next(st, head))) 971 while (pos && (head = get_next(net, st, head)))
931 pos--; 972 pos--;
932 return pos ? NULL : head; 973 return pos ? NULL : head;
933} 974}
934 975
935static void *seq_start(struct seq_file *seq, loff_t *pos) 976static void *seq_start(struct seq_file *s, loff_t *pos)
936 __acquires(rcu_bh) 977 __acquires(rcu_bh)
937{ 978{
938 rcu_read_lock_bh(); 979 rcu_read_lock_bh();
939 return get_idx(seq->private, *pos); 980 return get_idx(seq_file_net(s), s->private, *pos);
940} 981}
941 982
942static void *seq_next(struct seq_file *s, void *v, loff_t *pos) 983static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
943{ 984{
944 (*pos)++; 985 (*pos)++;
945 return get_next(s->private, v); 986 return get_next(seq_file_net(s), s->private, v);
946} 987}
947 988
948static void seq_stop(struct seq_file *s, void *v) 989static void seq_stop(struct seq_file *s, void *v)
@@ -971,8 +1012,8 @@ static const struct seq_operations nful_seq_ops = {
971 1012
972static int nful_open(struct inode *inode, struct file *file) 1013static int nful_open(struct inode *inode, struct file *file)
973{ 1014{
974 return seq_open_private(file, &nful_seq_ops, 1015 return seq_open_net(inode, file, &nful_seq_ops,
975 sizeof(struct iter_state)); 1016 sizeof(struct iter_state));
976} 1017}
977 1018
978static const struct file_operations nful_file_ops = { 1019static const struct file_operations nful_file_ops = {
@@ -980,17 +1021,43 @@ static const struct file_operations nful_file_ops = {
980 .open = nful_open, 1021 .open = nful_open,
981 .read = seq_read, 1022 .read = seq_read,
982 .llseek = seq_lseek, 1023 .llseek = seq_lseek,
983 .release = seq_release_private, 1024 .release = seq_release_net,
984}; 1025};
985 1026
986#endif /* PROC_FS */ 1027#endif /* PROC_FS */
987 1028
988static int __init nfnetlink_log_init(void) 1029static int __net_init nfnl_log_net_init(struct net *net)
989{ 1030{
990 int i, status = -ENOMEM; 1031 unsigned int i;
1032 struct nfnl_log_net *log = nfnl_log_pernet(net);
991 1033
992 for (i = 0; i < INSTANCE_BUCKETS; i++) 1034 for (i = 0; i < INSTANCE_BUCKETS; i++)
993 INIT_HLIST_HEAD(&instance_table[i]); 1035 INIT_HLIST_HEAD(&log->instance_table[i]);
1036 spin_lock_init(&log->instances_lock);
1037
1038#ifdef CONFIG_PROC_FS
1039 if (!proc_create("nfnetlink_log", 0440,
1040 net->nf.proc_netfilter, &nful_file_ops))
1041 return -ENOMEM;
1042#endif
1043 return 0;
1044}
1045
1046static void __net_exit nfnl_log_net_exit(struct net *net)
1047{
1048 remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
1049}
1050
1051static struct pernet_operations nfnl_log_net_ops = {
1052 .init = nfnl_log_net_init,
1053 .exit = nfnl_log_net_exit,
1054 .id = &nfnl_log_net_id,
1055 .size = sizeof(struct nfnl_log_net),
1056};
1057
1058static int __init nfnetlink_log_init(void)
1059{
1060 int status = -ENOMEM;
994 1061
995 /* it's not really all that important to have a random value, so 1062 /* it's not really all that important to have a random value, so
996 * we can do this from the init function, even if there hasn't 1063 * we can do this from the init function, even if there hasn't
@@ -1000,29 +1067,25 @@ static int __init nfnetlink_log_init(void)
1000 netlink_register_notifier(&nfulnl_rtnl_notifier); 1067 netlink_register_notifier(&nfulnl_rtnl_notifier);
1001 status = nfnetlink_subsys_register(&nfulnl_subsys); 1068 status = nfnetlink_subsys_register(&nfulnl_subsys);
1002 if (status < 0) { 1069 if (status < 0) {
1003 printk(KERN_ERR "log: failed to create netlink socket\n"); 1070 pr_err("log: failed to create netlink socket\n");
1004 goto cleanup_netlink_notifier; 1071 goto cleanup_netlink_notifier;
1005 } 1072 }
1006 1073
1007 status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger); 1074 status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
1008 if (status < 0) { 1075 if (status < 0) {
1009 printk(KERN_ERR "log: failed to register logger\n"); 1076 pr_err("log: failed to register logger\n");
1010 goto cleanup_subsys; 1077 goto cleanup_subsys;
1011 } 1078 }
1012 1079
1013#ifdef CONFIG_PROC_FS 1080 status = register_pernet_subsys(&nfnl_log_net_ops);
1014 if (!proc_create("nfnetlink_log", 0440, 1081 if (status < 0) {
1015 proc_net_netfilter, &nful_file_ops)) { 1082 pr_err("log: failed to register pernet ops\n");
1016 status = -ENOMEM;
1017 goto cleanup_logger; 1083 goto cleanup_logger;
1018 } 1084 }
1019#endif
1020 return status; 1085 return status;
1021 1086
1022#ifdef CONFIG_PROC_FS
1023cleanup_logger: 1087cleanup_logger:
1024 nf_log_unregister(&nfulnl_logger); 1088 nf_log_unregister(&nfulnl_logger);
1025#endif
1026cleanup_subsys: 1089cleanup_subsys:
1027 nfnetlink_subsys_unregister(&nfulnl_subsys); 1090 nfnetlink_subsys_unregister(&nfulnl_subsys);
1028cleanup_netlink_notifier: 1091cleanup_netlink_notifier:
@@ -1032,10 +1095,8 @@ cleanup_netlink_notifier:
1032 1095
1033static void __exit nfnetlink_log_fini(void) 1096static void __exit nfnetlink_log_fini(void)
1034{ 1097{
1098 unregister_pernet_subsys(&nfnl_log_net_ops);
1035 nf_log_unregister(&nfulnl_logger); 1099 nf_log_unregister(&nfulnl_logger);
1036#ifdef CONFIG_PROC_FS
1037 remove_proc_entry("nfnetlink_log", proc_net_netfilter);
1038#endif
1039 nfnetlink_subsys_unregister(&nfulnl_subsys); 1100 nfnetlink_subsys_unregister(&nfulnl_subsys);
1040 netlink_unregister_notifier(&nfulnl_rtnl_notifier); 1101 netlink_unregister_notifier(&nfulnl_rtnl_notifier);
1041} 1102}
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 42680b2baa11..ef3cdb4bfeea 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -30,6 +30,7 @@
30#include <linux/list.h> 30#include <linux/list.h>
31#include <net/sock.h> 31#include <net/sock.h>
32#include <net/netfilter/nf_queue.h> 32#include <net/netfilter/nf_queue.h>
33#include <net/netns/generic.h>
33#include <net/netfilter/nfnetlink_queue.h> 34#include <net/netfilter/nfnetlink_queue.h>
34 35
35#include <linux/atomic.h> 36#include <linux/atomic.h>
@@ -66,23 +67,31 @@ struct nfqnl_instance {
66 67
67typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); 68typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
68 69
69static DEFINE_SPINLOCK(instances_lock); 70static int nfnl_queue_net_id __read_mostly;
70 71
71#define INSTANCE_BUCKETS 16 72#define INSTANCE_BUCKETS 16
72static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; 73struct nfnl_queue_net {
74 spinlock_t instances_lock;
75 struct hlist_head instance_table[INSTANCE_BUCKETS];
76};
77
78static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
79{
80 return net_generic(net, nfnl_queue_net_id);
81}
73 82
74static inline u_int8_t instance_hashfn(u_int16_t queue_num) 83static inline u_int8_t instance_hashfn(u_int16_t queue_num)
75{ 84{
76 return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; 85 return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
77} 86}
78 87
79static struct nfqnl_instance * 88static struct nfqnl_instance *
80instance_lookup(u_int16_t queue_num) 89instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
81{ 90{
82 struct hlist_head *head; 91 struct hlist_head *head;
83 struct nfqnl_instance *inst; 92 struct nfqnl_instance *inst;
84 93
85 head = &instance_table[instance_hashfn(queue_num)]; 94 head = &q->instance_table[instance_hashfn(queue_num)];
86 hlist_for_each_entry_rcu(inst, head, hlist) { 95 hlist_for_each_entry_rcu(inst, head, hlist) {
87 if (inst->queue_num == queue_num) 96 if (inst->queue_num == queue_num)
88 return inst; 97 return inst;
@@ -91,14 +100,15 @@ instance_lookup(u_int16_t queue_num)
91} 100}
92 101
93static struct nfqnl_instance * 102static struct nfqnl_instance *
94instance_create(u_int16_t queue_num, int portid) 103instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
104 int portid)
95{ 105{
96 struct nfqnl_instance *inst; 106 struct nfqnl_instance *inst;
97 unsigned int h; 107 unsigned int h;
98 int err; 108 int err;
99 109
100 spin_lock(&instances_lock); 110 spin_lock(&q->instances_lock);
101 if (instance_lookup(queue_num)) { 111 if (instance_lookup(q, queue_num)) {
102 err = -EEXIST; 112 err = -EEXIST;
103 goto out_unlock; 113 goto out_unlock;
104 } 114 }
@@ -123,16 +133,16 @@ instance_create(u_int16_t queue_num, int portid)
123 } 133 }
124 134
125 h = instance_hashfn(queue_num); 135 h = instance_hashfn(queue_num);
126 hlist_add_head_rcu(&inst->hlist, &instance_table[h]); 136 hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
127 137
128 spin_unlock(&instances_lock); 138 spin_unlock(&q->instances_lock);
129 139
130 return inst; 140 return inst;
131 141
132out_free: 142out_free:
133 kfree(inst); 143 kfree(inst);
134out_unlock: 144out_unlock:
135 spin_unlock(&instances_lock); 145 spin_unlock(&q->instances_lock);
136 return ERR_PTR(err); 146 return ERR_PTR(err);
137} 147}
138 148
@@ -158,11 +168,11 @@ __instance_destroy(struct nfqnl_instance *inst)
158} 168}
159 169
160static void 170static void
161instance_destroy(struct nfqnl_instance *inst) 171instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
162{ 172{
163 spin_lock(&instances_lock); 173 spin_lock(&q->instances_lock);
164 __instance_destroy(inst); 174 __instance_destroy(inst);
165 spin_unlock(&instances_lock); 175 spin_unlock(&q->instances_lock);
166} 176}
167 177
168static inline void 178static inline void
@@ -217,14 +227,59 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
217 spin_unlock_bh(&queue->lock); 227 spin_unlock_bh(&queue->lock);
218} 228}
219 229
230static void
231nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
232{
233 int i, j = 0;
234 int plen = 0; /* length of skb->head fragment */
235 struct page *page;
236 unsigned int offset;
237
238 /* dont bother with small payloads */
239 if (len <= skb_tailroom(to)) {
240 skb_copy_bits(from, 0, skb_put(to, len), len);
241 return;
242 }
243
244 if (hlen) {
245 skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
246 len -= hlen;
247 } else {
248 plen = min_t(int, skb_headlen(from), len);
249 if (plen) {
250 page = virt_to_head_page(from->head);
251 offset = from->data - (unsigned char *)page_address(page);
252 __skb_fill_page_desc(to, 0, page, offset, plen);
253 get_page(page);
254 j = 1;
255 len -= plen;
256 }
257 }
258
259 to->truesize += len + plen;
260 to->len += len + plen;
261 to->data_len += len + plen;
262
263 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
264 if (!len)
265 break;
266 skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
267 skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
268 len -= skb_shinfo(to)->frags[j].size;
269 skb_frag_ref(to, j);
270 j++;
271 }
272 skb_shinfo(to)->nr_frags = j;
273}
274
220static struct sk_buff * 275static struct sk_buff *
221nfqnl_build_packet_message(struct nfqnl_instance *queue, 276nfqnl_build_packet_message(struct nfqnl_instance *queue,
222 struct nf_queue_entry *entry, 277 struct nf_queue_entry *entry,
223 __be32 **packet_id_ptr) 278 __be32 **packet_id_ptr)
224{ 279{
225 sk_buff_data_t old_tail;
226 size_t size; 280 size_t size;
227 size_t data_len = 0, cap_len = 0; 281 size_t data_len = 0, cap_len = 0;
282 int hlen = 0;
228 struct sk_buff *skb; 283 struct sk_buff *skb;
229 struct nlattr *nla; 284 struct nlattr *nla;
230 struct nfqnl_msg_packet_hdr *pmsg; 285 struct nfqnl_msg_packet_hdr *pmsg;
@@ -236,7 +291,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
236 struct nf_conn *ct = NULL; 291 struct nf_conn *ct = NULL;
237 enum ip_conntrack_info uninitialized_var(ctinfo); 292 enum ip_conntrack_info uninitialized_var(ctinfo);
238 293
239 size = NLMSG_SPACE(sizeof(struct nfgenmsg)) 294 size = nlmsg_total_size(sizeof(struct nfgenmsg))
240 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) 295 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
241 + nla_total_size(sizeof(u_int32_t)) /* ifindex */ 296 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
242 + nla_total_size(sizeof(u_int32_t)) /* ifindex */ 297 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
@@ -246,8 +301,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
246#endif 301#endif
247 + nla_total_size(sizeof(u_int32_t)) /* mark */ 302 + nla_total_size(sizeof(u_int32_t)) /* mark */
248 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) 303 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
249 + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp) 304 + nla_total_size(sizeof(u_int32_t)); /* cap_len */
250 + nla_total_size(sizeof(u_int32_t))); /* cap_len */ 305
306 if (entskb->tstamp.tv64)
307 size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
251 308
252 outdev = entry->outdev; 309 outdev = entry->outdev;
253 310
@@ -265,7 +322,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
265 if (data_len == 0 || data_len > entskb->len) 322 if (data_len == 0 || data_len > entskb->len)
266 data_len = entskb->len; 323 data_len = entskb->len;
267 324
268 size += nla_total_size(data_len); 325
326 if (!entskb->head_frag ||
327 skb_headlen(entskb) < L1_CACHE_BYTES ||
328 skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
329 hlen = skb_headlen(entskb);
330
331 if (skb_has_frag_list(entskb))
332 hlen = entskb->len;
333 hlen = min_t(int, data_len, hlen);
334 size += sizeof(struct nlattr) + hlen;
269 cap_len = entskb->len; 335 cap_len = entskb->len;
270 break; 336 break;
271 } 337 }
@@ -273,11 +339,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
273 if (queue->flags & NFQA_CFG_F_CONNTRACK) 339 if (queue->flags & NFQA_CFG_F_CONNTRACK)
274 ct = nfqnl_ct_get(entskb, &size, &ctinfo); 340 ct = nfqnl_ct_get(entskb, &size, &ctinfo);
275 341
276 skb = alloc_skb(size, GFP_ATOMIC); 342 skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid,
343 GFP_ATOMIC);
277 if (!skb) 344 if (!skb)
278 return NULL; 345 return NULL;
279 346
280 old_tail = skb->tail;
281 nlh = nlmsg_put(skb, 0, 0, 347 nlh = nlmsg_put(skb, 0, 0,
282 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, 348 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
283 sizeof(struct nfgenmsg), 0); 349 sizeof(struct nfgenmsg), 0);
@@ -382,31 +448,26 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
382 goto nla_put_failure; 448 goto nla_put_failure;
383 } 449 }
384 450
451 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
452 goto nla_put_failure;
453
454 if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
455 goto nla_put_failure;
456
385 if (data_len) { 457 if (data_len) {
386 struct nlattr *nla; 458 struct nlattr *nla;
387 int sz = nla_attr_size(data_len);
388 459
389 if (skb_tailroom(skb) < nla_total_size(data_len)) { 460 if (skb_tailroom(skb) < sizeof(*nla) + hlen)
390 printk(KERN_WARNING "nf_queue: no tailroom!\n"); 461 goto nla_put_failure;
391 kfree_skb(skb);
392 return NULL;
393 }
394 462
395 nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); 463 nla = (struct nlattr *)skb_put(skb, sizeof(*nla));
396 nla->nla_type = NFQA_PAYLOAD; 464 nla->nla_type = NFQA_PAYLOAD;
397 nla->nla_len = sz; 465 nla->nla_len = nla_attr_size(data_len);
398 466
399 if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) 467 nfqnl_zcopy(skb, entskb, data_len, hlen);
400 BUG();
401 } 468 }
402 469
403 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) 470 nlh->nlmsg_len = skb->len;
404 goto nla_put_failure;
405
406 if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
407 goto nla_put_failure;
408
409 nlh->nlmsg_len = skb->tail - old_tail;
410 return skb; 471 return skb;
411 472
412nla_put_failure: 473nla_put_failure:
@@ -423,9 +484,12 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
423 int err = -ENOBUFS; 484 int err = -ENOBUFS;
424 __be32 *packet_id_ptr; 485 __be32 *packet_id_ptr;
425 int failopen = 0; 486 int failopen = 0;
487 struct net *net = dev_net(entry->indev ?
488 entry->indev : entry->outdev);
489 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
426 490
427 /* rcu_read_lock()ed by nf_hook_slow() */ 491 /* rcu_read_lock()ed by nf_hook_slow() */
428 queue = instance_lookup(queuenum); 492 queue = instance_lookup(q, queuenum);
429 if (!queue) { 493 if (!queue) {
430 err = -ESRCH; 494 err = -ESRCH;
431 goto err_out; 495 goto err_out;
@@ -462,7 +526,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
462 *packet_id_ptr = htonl(entry->id); 526 *packet_id_ptr = htonl(entry->id);
463 527
464 /* nfnetlink_unicast will either free the nskb or add it to a socket */ 528 /* nfnetlink_unicast will either free the nskb or add it to a socket */
465 err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT); 529 err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT);
466 if (err < 0) { 530 if (err < 0) {
467 queue->queue_user_dropped++; 531 queue->queue_user_dropped++;
468 goto err_out_unlock; 532 goto err_out_unlock;
@@ -575,15 +639,16 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
575/* drop all packets with either indev or outdev == ifindex from all queue 639/* drop all packets with either indev or outdev == ifindex from all queue
576 * instances */ 640 * instances */
577static void 641static void
578nfqnl_dev_drop(int ifindex) 642nfqnl_dev_drop(struct net *net, int ifindex)
579{ 643{
580 int i; 644 int i;
645 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
581 646
582 rcu_read_lock(); 647 rcu_read_lock();
583 648
584 for (i = 0; i < INSTANCE_BUCKETS; i++) { 649 for (i = 0; i < INSTANCE_BUCKETS; i++) {
585 struct nfqnl_instance *inst; 650 struct nfqnl_instance *inst;
586 struct hlist_head *head = &instance_table[i]; 651 struct hlist_head *head = &q->instance_table[i];
587 652
588 hlist_for_each_entry_rcu(inst, head, hlist) 653 hlist_for_each_entry_rcu(inst, head, hlist)
589 nfqnl_flush(inst, dev_cmp, ifindex); 654 nfqnl_flush(inst, dev_cmp, ifindex);
@@ -600,12 +665,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this,
600{ 665{
601 struct net_device *dev = ptr; 666 struct net_device *dev = ptr;
602 667
603 if (!net_eq(dev_net(dev), &init_net))
604 return NOTIFY_DONE;
605
606 /* Drop any packets associated with the downed device */ 668 /* Drop any packets associated with the downed device */
607 if (event == NETDEV_DOWN) 669 if (event == NETDEV_DOWN)
608 nfqnl_dev_drop(dev->ifindex); 670 nfqnl_dev_drop(dev_net(dev), dev->ifindex);
609 return NOTIFY_DONE; 671 return NOTIFY_DONE;
610} 672}
611 673
@@ -618,24 +680,24 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
618 unsigned long event, void *ptr) 680 unsigned long event, void *ptr)
619{ 681{
620 struct netlink_notify *n = ptr; 682 struct netlink_notify *n = ptr;
683 struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);
621 684
622 if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { 685 if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
623 int i; 686 int i;
624 687
625 /* destroy all instances for this portid */ 688 /* destroy all instances for this portid */
626 spin_lock(&instances_lock); 689 spin_lock(&q->instances_lock);
627 for (i = 0; i < INSTANCE_BUCKETS; i++) { 690 for (i = 0; i < INSTANCE_BUCKETS; i++) {
628 struct hlist_node *t2; 691 struct hlist_node *t2;
629 struct nfqnl_instance *inst; 692 struct nfqnl_instance *inst;
630 struct hlist_head *head = &instance_table[i]; 693 struct hlist_head *head = &q->instance_table[i];
631 694
632 hlist_for_each_entry_safe(inst, t2, head, hlist) { 695 hlist_for_each_entry_safe(inst, t2, head, hlist) {
633 if ((n->net == &init_net) && 696 if (n->portid == inst->peer_portid)
634 (n->portid == inst->peer_portid))
635 __instance_destroy(inst); 697 __instance_destroy(inst);
636 } 698 }
637 } 699 }
638 spin_unlock(&instances_lock); 700 spin_unlock(&q->instances_lock);
639 } 701 }
640 return NOTIFY_DONE; 702 return NOTIFY_DONE;
641} 703}
@@ -656,11 +718,12 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
656 [NFQA_MARK] = { .type = NLA_U32 }, 718 [NFQA_MARK] = { .type = NLA_U32 },
657}; 719};
658 720
659static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid) 721static struct nfqnl_instance *
722verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
660{ 723{
661 struct nfqnl_instance *queue; 724 struct nfqnl_instance *queue;
662 725
663 queue = instance_lookup(queue_num); 726 queue = instance_lookup(q, queue_num);
664 if (!queue) 727 if (!queue)
665 return ERR_PTR(-ENODEV); 728 return ERR_PTR(-ENODEV);
666 729
@@ -704,7 +767,11 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
704 LIST_HEAD(batch_list); 767 LIST_HEAD(batch_list);
705 u16 queue_num = ntohs(nfmsg->res_id); 768 u16 queue_num = ntohs(nfmsg->res_id);
706 769
707 queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); 770 struct net *net = sock_net(ctnl);
771 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
772
773 queue = verdict_instance_lookup(q, queue_num,
774 NETLINK_CB(skb).portid);
708 if (IS_ERR(queue)) 775 if (IS_ERR(queue))
709 return PTR_ERR(queue); 776 return PTR_ERR(queue);
710 777
@@ -752,10 +819,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
752 enum ip_conntrack_info uninitialized_var(ctinfo); 819 enum ip_conntrack_info uninitialized_var(ctinfo);
753 struct nf_conn *ct = NULL; 820 struct nf_conn *ct = NULL;
754 821
755 queue = instance_lookup(queue_num); 822 struct net *net = sock_net(ctnl);
756 if (!queue) 823 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
757 824
758 queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); 825 queue = instance_lookup(q, queue_num);
826 if (!queue)
827 queue = verdict_instance_lookup(q, queue_num,
828 NETLINK_CB(skb).portid);
759 if (IS_ERR(queue)) 829 if (IS_ERR(queue))
760 return PTR_ERR(queue); 830 return PTR_ERR(queue);
761 831
@@ -819,6 +889,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
819 u_int16_t queue_num = ntohs(nfmsg->res_id); 889 u_int16_t queue_num = ntohs(nfmsg->res_id);
820 struct nfqnl_instance *queue; 890 struct nfqnl_instance *queue;
821 struct nfqnl_msg_config_cmd *cmd = NULL; 891 struct nfqnl_msg_config_cmd *cmd = NULL;
892 struct net *net = sock_net(ctnl);
893 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
822 int ret = 0; 894 int ret = 0;
823 895
824 if (nfqa[NFQA_CFG_CMD]) { 896 if (nfqa[NFQA_CFG_CMD]) {
@@ -832,7 +904,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
832 } 904 }
833 905
834 rcu_read_lock(); 906 rcu_read_lock();
835 queue = instance_lookup(queue_num); 907 queue = instance_lookup(q, queue_num);
836 if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { 908 if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
837 ret = -EPERM; 909 ret = -EPERM;
838 goto err_out_unlock; 910 goto err_out_unlock;
@@ -845,7 +917,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
845 ret = -EBUSY; 917 ret = -EBUSY;
846 goto err_out_unlock; 918 goto err_out_unlock;
847 } 919 }
848 queue = instance_create(queue_num, NETLINK_CB(skb).portid); 920 queue = instance_create(q, queue_num,
921 NETLINK_CB(skb).portid);
849 if (IS_ERR(queue)) { 922 if (IS_ERR(queue)) {
850 ret = PTR_ERR(queue); 923 ret = PTR_ERR(queue);
851 goto err_out_unlock; 924 goto err_out_unlock;
@@ -856,7 +929,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
856 ret = -ENODEV; 929 ret = -ENODEV;
857 goto err_out_unlock; 930 goto err_out_unlock;
858 } 931 }
859 instance_destroy(queue); 932 instance_destroy(q, queue);
860 break; 933 break;
861 case NFQNL_CFG_CMD_PF_BIND: 934 case NFQNL_CFG_CMD_PF_BIND:
862 case NFQNL_CFG_CMD_PF_UNBIND: 935 case NFQNL_CFG_CMD_PF_UNBIND:
@@ -950,19 +1023,24 @@ static const struct nfnetlink_subsystem nfqnl_subsys = {
950 1023
951#ifdef CONFIG_PROC_FS 1024#ifdef CONFIG_PROC_FS
952struct iter_state { 1025struct iter_state {
1026 struct seq_net_private p;
953 unsigned int bucket; 1027 unsigned int bucket;
954}; 1028};
955 1029
956static struct hlist_node *get_first(struct seq_file *seq) 1030static struct hlist_node *get_first(struct seq_file *seq)
957{ 1031{
958 struct iter_state *st = seq->private; 1032 struct iter_state *st = seq->private;
1033 struct net *net;
1034 struct nfnl_queue_net *q;
959 1035
960 if (!st) 1036 if (!st)
961 return NULL; 1037 return NULL;
962 1038
1039 net = seq_file_net(seq);
1040 q = nfnl_queue_pernet(net);
963 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { 1041 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
964 if (!hlist_empty(&instance_table[st->bucket])) 1042 if (!hlist_empty(&q->instance_table[st->bucket]))
965 return instance_table[st->bucket].first; 1043 return q->instance_table[st->bucket].first;
966 } 1044 }
967 return NULL; 1045 return NULL;
968} 1046}
@@ -970,13 +1048,17 @@ static struct hlist_node *get_first(struct seq_file *seq)
970static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) 1048static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
971{ 1049{
972 struct iter_state *st = seq->private; 1050 struct iter_state *st = seq->private;
1051 struct net *net = seq_file_net(seq);
973 1052
974 h = h->next; 1053 h = h->next;
975 while (!h) { 1054 while (!h) {
1055 struct nfnl_queue_net *q;
1056
976 if (++st->bucket >= INSTANCE_BUCKETS) 1057 if (++st->bucket >= INSTANCE_BUCKETS)
977 return NULL; 1058 return NULL;
978 1059
979 h = instance_table[st->bucket].first; 1060 q = nfnl_queue_pernet(net);
1061 h = q->instance_table[st->bucket].first;
980 } 1062 }
981 return h; 1063 return h;
982} 1064}
@@ -992,11 +1074,11 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
992 return pos ? NULL : head; 1074 return pos ? NULL : head;
993} 1075}
994 1076
995static void *seq_start(struct seq_file *seq, loff_t *pos) 1077static void *seq_start(struct seq_file *s, loff_t *pos)
996 __acquires(instances_lock) 1078 __acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
997{ 1079{
998 spin_lock(&instances_lock); 1080 spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
999 return get_idx(seq, *pos); 1081 return get_idx(s, *pos);
1000} 1082}
1001 1083
1002static void *seq_next(struct seq_file *s, void *v, loff_t *pos) 1084static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
@@ -1006,9 +1088,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
1006} 1088}
1007 1089
1008static void seq_stop(struct seq_file *s, void *v) 1090static void seq_stop(struct seq_file *s, void *v)
1009 __releases(instances_lock) 1091 __releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
1010{ 1092{
1011 spin_unlock(&instances_lock); 1093 spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
1012} 1094}
1013 1095
1014static int seq_show(struct seq_file *s, void *v) 1096static int seq_show(struct seq_file *s, void *v)
@@ -1032,7 +1114,7 @@ static const struct seq_operations nfqnl_seq_ops = {
1032 1114
1033static int nfqnl_open(struct inode *inode, struct file *file) 1115static int nfqnl_open(struct inode *inode, struct file *file)
1034{ 1116{
1035 return seq_open_private(file, &nfqnl_seq_ops, 1117 return seq_open_net(inode, file, &nfqnl_seq_ops,
1036 sizeof(struct iter_state)); 1118 sizeof(struct iter_state));
1037} 1119}
1038 1120
@@ -1041,41 +1123,63 @@ static const struct file_operations nfqnl_file_ops = {
1041 .open = nfqnl_open, 1123 .open = nfqnl_open,
1042 .read = seq_read, 1124 .read = seq_read,
1043 .llseek = seq_lseek, 1125 .llseek = seq_lseek,
1044 .release = seq_release_private, 1126 .release = seq_release_net,
1045}; 1127};
1046 1128
1047#endif /* PROC_FS */ 1129#endif /* PROC_FS */
1048 1130
1049static int __init nfnetlink_queue_init(void) 1131static int __net_init nfnl_queue_net_init(struct net *net)
1050{ 1132{
1051 int i, status = -ENOMEM; 1133 unsigned int i;
1134 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1052 1135
1053 for (i = 0; i < INSTANCE_BUCKETS; i++) 1136 for (i = 0; i < INSTANCE_BUCKETS; i++)
1054 INIT_HLIST_HEAD(&instance_table[i]); 1137 INIT_HLIST_HEAD(&q->instance_table[i]);
1138
1139 spin_lock_init(&q->instances_lock);
1140
1141#ifdef CONFIG_PROC_FS
1142 if (!proc_create("nfnetlink_queue", 0440,
1143 net->nf.proc_netfilter, &nfqnl_file_ops))
1144 return -ENOMEM;
1145#endif
1146 return 0;
1147}
1148
1149static void __net_exit nfnl_queue_net_exit(struct net *net)
1150{
1151 remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
1152}
1153
1154static struct pernet_operations nfnl_queue_net_ops = {
1155 .init = nfnl_queue_net_init,
1156 .exit = nfnl_queue_net_exit,
1157 .id = &nfnl_queue_net_id,
1158 .size = sizeof(struct nfnl_queue_net),
1159};
1160
1161static int __init nfnetlink_queue_init(void)
1162{
1163 int status = -ENOMEM;
1055 1164
1056 netlink_register_notifier(&nfqnl_rtnl_notifier); 1165 netlink_register_notifier(&nfqnl_rtnl_notifier);
1057 status = nfnetlink_subsys_register(&nfqnl_subsys); 1166 status = nfnetlink_subsys_register(&nfqnl_subsys);
1058 if (status < 0) { 1167 if (status < 0) {
1059 printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); 1168 pr_err("nf_queue: failed to create netlink socket\n");
1060 goto cleanup_netlink_notifier; 1169 goto cleanup_netlink_notifier;
1061 } 1170 }
1062 1171
1063#ifdef CONFIG_PROC_FS 1172 status = register_pernet_subsys(&nfnl_queue_net_ops);
1064 if (!proc_create("nfnetlink_queue", 0440, 1173 if (status < 0) {
1065 proc_net_netfilter, &nfqnl_file_ops)) { 1174 pr_err("nf_queue: failed to register pernet ops\n");
1066 status = -ENOMEM;
1067 goto cleanup_subsys; 1175 goto cleanup_subsys;
1068 } 1176 }
1069#endif
1070
1071 register_netdevice_notifier(&nfqnl_dev_notifier); 1177 register_netdevice_notifier(&nfqnl_dev_notifier);
1072 nf_register_queue_handler(&nfqh); 1178 nf_register_queue_handler(&nfqh);
1073 return status; 1179 return status;
1074 1180
1075#ifdef CONFIG_PROC_FS
1076cleanup_subsys: 1181cleanup_subsys:
1077 nfnetlink_subsys_unregister(&nfqnl_subsys); 1182 nfnetlink_subsys_unregister(&nfqnl_subsys);
1078#endif
1079cleanup_netlink_notifier: 1183cleanup_netlink_notifier:
1080 netlink_unregister_notifier(&nfqnl_rtnl_notifier); 1184 netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1081 return status; 1185 return status;
@@ -1085,9 +1189,7 @@ static void __exit nfnetlink_queue_fini(void)
1085{ 1189{
1086 nf_unregister_queue_handler(); 1190 nf_unregister_queue_handler();
1087 unregister_netdevice_notifier(&nfqnl_dev_notifier); 1191 unregister_netdevice_notifier(&nfqnl_dev_notifier);
1088#ifdef CONFIG_PROC_FS 1192 unregister_pernet_subsys(&nfnl_queue_net_ops);
1089 remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
1090#endif
1091 nfnetlink_subsys_unregister(&nfqnl_subsys); 1193 nfnetlink_subsys_unregister(&nfqnl_subsys);
1092 netlink_unregister_notifier(&nfqnl_rtnl_notifier); 1194 netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1093 1195
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 686c7715d777..1a73b18683b6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -2,6 +2,7 @@
2 * x_tables core - Backend for {ip,ip6,arp}_tables 2 * x_tables core - Backend for {ip,ip6,arp}_tables
3 * 3 *
4 * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org> 4 * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org>
5 * Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * Based on existing ip_tables code which is 7 * Based on existing ip_tables code which is
7 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling 8 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index fa40096940a1..fe573f6c9e91 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -474,7 +474,14 @@ ipt_log_packet(u_int8_t pf,
474 const struct nf_loginfo *loginfo, 474 const struct nf_loginfo *loginfo,
475 const char *prefix) 475 const char *prefix)
476{ 476{
477 struct sbuff *m = sb_open(); 477 struct sbuff *m;
478 struct net *net = dev_net(in ? in : out);
479
480 /* FIXME: Disabled from containers until syslog ns is supported */
481 if (!net_eq(net, &init_net))
482 return;
483
484 m = sb_open();
478 485
479 if (!loginfo) 486 if (!loginfo)
480 loginfo = &default_loginfo; 487 loginfo = &default_loginfo;
@@ -798,7 +805,14 @@ ip6t_log_packet(u_int8_t pf,
798 const struct nf_loginfo *loginfo, 805 const struct nf_loginfo *loginfo,
799 const char *prefix) 806 const char *prefix)
800{ 807{
801 struct sbuff *m = sb_open(); 808 struct sbuff *m;
809 struct net *net = dev_net(in ? in : out);
810
811 /* FIXME: Disabled from containers until syslog ns is supported */
812 if (!net_eq(net, &init_net))
813 return;
814
815 m = sb_open();
802 816
803 if (!loginfo) 817 if (!loginfo)
804 loginfo = &default_loginfo; 818 loginfo = &default_loginfo;
@@ -893,23 +907,55 @@ static struct nf_logger ip6t_log_logger __read_mostly = {
893}; 907};
894#endif 908#endif
895 909
910static int __net_init log_net_init(struct net *net)
911{
912 nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger);
913#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
914 nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger);
915#endif
916 return 0;
917}
918
919static void __net_exit log_net_exit(struct net *net)
920{
921 nf_log_unset(net, &ipt_log_logger);
922#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
923 nf_log_unset(net, &ip6t_log_logger);
924#endif
925}
926
927static struct pernet_operations log_net_ops = {
928 .init = log_net_init,
929 .exit = log_net_exit,
930};
931
896static int __init log_tg_init(void) 932static int __init log_tg_init(void)
897{ 933{
898 int ret; 934 int ret;
899 935
936 ret = register_pernet_subsys(&log_net_ops);
937 if (ret < 0)
938 goto err_pernet;
939
900 ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); 940 ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
901 if (ret < 0) 941 if (ret < 0)
902 return ret; 942 goto err_target;
903 943
904 nf_log_register(NFPROTO_IPV4, &ipt_log_logger); 944 nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
905#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 945#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
906 nf_log_register(NFPROTO_IPV6, &ip6t_log_logger); 946 nf_log_register(NFPROTO_IPV6, &ip6t_log_logger);
907#endif 947#endif
908 return 0; 948 return 0;
949
950err_target:
951 unregister_pernet_subsys(&log_net_ops);
952err_pernet:
953 return ret;
909} 954}
910 955
911static void __exit log_tg_exit(void) 956static void __exit log_tg_exit(void)
912{ 957{
958 unregister_pernet_subsys(&log_net_ops);
913 nf_log_unregister(&ipt_log_logger); 959 nf_log_unregister(&ipt_log_logger);
914#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 960#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
915 nf_log_unregister(&ip6t_log_logger); 961 nf_log_unregister(&ip6t_log_logger);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 817f9e9f2b16..1e2fae32f81b 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -76,22 +76,31 @@ static u32 hash_v6(const struct sk_buff *skb)
76} 76}
77#endif 77#endif
78 78
79static unsigned int 79static u32
80nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) 80nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par)
81{ 81{
82 const struct xt_NFQ_info_v1 *info = par->targinfo; 82 const struct xt_NFQ_info_v1 *info = par->targinfo;
83 u32 queue = info->queuenum; 83 u32 queue = info->queuenum;
84 84
85 if (info->queues_total > 1) { 85 if (par->family == NFPROTO_IPV4)
86 if (par->family == NFPROTO_IPV4) 86 queue += ((u64) hash_v4(skb) * info->queues_total) >> 32;
87 queue = (((u64) hash_v4(skb) * info->queues_total) >>
88 32) + queue;
89#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 87#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
90 else if (par->family == NFPROTO_IPV6) 88 else if (par->family == NFPROTO_IPV6)
91 queue = (((u64) hash_v6(skb) * info->queues_total) >> 89 queue += ((u64) hash_v6(skb) * info->queues_total) >> 32;
92 32) + queue;
93#endif 90#endif
94 } 91
92 return queue;
93}
94
95static unsigned int
96nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
97{
98 const struct xt_NFQ_info_v1 *info = par->targinfo;
99 u32 queue = info->queuenum;
100
101 if (info->queues_total > 1)
102 queue = nfqueue_hash(skb, par);
103
95 return NF_QUEUE_NR(queue); 104 return NF_QUEUE_NR(queue);
96} 105}
97 106
@@ -108,7 +117,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
108 117
109static int nfqueue_tg_check(const struct xt_tgchk_param *par) 118static int nfqueue_tg_check(const struct xt_tgchk_param *par)
110{ 119{
111 const struct xt_NFQ_info_v2 *info = par->targinfo; 120 const struct xt_NFQ_info_v3 *info = par->targinfo;
112 u32 maxid; 121 u32 maxid;
113 122
114 if (unlikely(!rnd_inited)) { 123 if (unlikely(!rnd_inited)) {
@@ -125,11 +134,32 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
125 info->queues_total, maxid); 134 info->queues_total, maxid);
126 return -ERANGE; 135 return -ERANGE;
127 } 136 }
128 if (par->target->revision == 2 && info->bypass > 1) 137 if (par->target->revision == 2 && info->flags > 1)
129 return -EINVAL; 138 return -EINVAL;
139 if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK)
140 return -EINVAL;
141
130 return 0; 142 return 0;
131} 143}
132 144
145static unsigned int
146nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
147{
148 const struct xt_NFQ_info_v3 *info = par->targinfo;
149 u32 queue = info->queuenum;
150
151 if (info->queues_total > 1) {
152 if (info->flags & NFQ_FLAG_CPU_FANOUT) {
153 int cpu = smp_processor_id();
154
155 queue = info->queuenum + cpu % info->queues_total;
156 } else
157 queue = nfqueue_hash(skb, par);
158 }
159
160 return NF_QUEUE_NR(queue);
161}
162
133static struct xt_target nfqueue_tg_reg[] __read_mostly = { 163static struct xt_target nfqueue_tg_reg[] __read_mostly = {
134 { 164 {
135 .name = "NFQUEUE", 165 .name = "NFQUEUE",
@@ -156,6 +186,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
156 .targetsize = sizeof(struct xt_NFQ_info_v2), 186 .targetsize = sizeof(struct xt_NFQ_info_v2),
157 .me = THIS_MODULE, 187 .me = THIS_MODULE,
158 }, 188 },
189 {
190 .name = "NFQUEUE",
191 .revision = 3,
192 .family = NFPROTO_UNSPEC,
193 .checkentry = nfqueue_tg_check,
194 .target = nfqueue_tg_v3,
195 .targetsize = sizeof(struct xt_NFQ_info_v3),
196 .me = THIS_MODULE,
197 },
159}; 198};
160 199
161static int __init nfqueue_tg_init(void) 200static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 71a266de5fb4..a75240f0d42b 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -2,6 +2,7 @@
2 * This is a module which is used for setting the MSS option in TCP packets. 2 * This is a module which is used for setting the MSS option in TCP packets.
3 * 3 *
4 * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca> 4 * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
5 * Copyright (C) 2007 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 61805d7b38aa..188404b9b002 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -3,6 +3,7 @@
3 * information. (Superset of Rusty's minimalistic state match.) 3 * information. (Superset of Rusty's minimalistic state match.)
4 * 4 *
5 * (C) 2001 Marc Boucher (marc@mbsi.ca). 5 * (C) 2001 Marc Boucher (marc@mbsi.ca).
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008 7 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index f330e8beaf69..0199e7bb8f81 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -3,6 +3,7 @@
3 * separately for each hashbucket (sourceip/sourceport/dstip/dstport) 3 * separately for each hashbucket (sourceip/sourceport/dstip/dstport)
4 * 4 *
5 * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> 5 * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008 7 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * 8 *
8 * Development of this code was funded by Astaro AG, http://www.astaro.com/ 9 * Development of this code was funded by Astaro AG, http://www.astaro.com/
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index a4c1e4528cac..bef850596558 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -1,5 +1,6 @@
1/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> 1/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
2 * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> 2 * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
3 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index a5e673d32bda..647d989a01e6 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
201 unsigned char opts[MAX_IPOPTLEN]; 201 unsigned char opts[MAX_IPOPTLEN];
202 const struct xt_osf_finger *kf; 202 const struct xt_osf_finger *kf;
203 const struct xt_osf_user_finger *f; 203 const struct xt_osf_user_finger *f;
204 struct net *net = dev_net(p->in ? p->in : p->out);
204 205
205 if (!info) 206 if (!info)
206 return false; 207 return false;
@@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
325 fcount++; 326 fcount++;
326 327
327 if (info->flags & XT_OSF_LOG) 328 if (info->flags & XT_OSF_LOG)
328 nf_log_packet(p->family, p->hooknum, skb, 329 nf_log_packet(net, p->family, p->hooknum, skb,
329 p->in, p->out, NULL, 330 p->in, p->out, NULL,
330 "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", 331 "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
331 f->genre, f->version, f->subtype, 332 f->genre, f->version, f->subtype,
@@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
341 rcu_read_unlock(); 342 rcu_read_unlock();
342 343
343 if (!fcount && (info->flags & XT_OSF_LOG)) 344 if (!fcount && (info->flags & XT_OSF_LOG))
344 nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, 345 nf_log_packet(net, p->family, p->hooknum, skb, p->in,
346 p->out, NULL,
345 "Remote OS is not known: %pI4:%u -> %pI4:%u\n", 347 "Remote OS is not known: %pI4:%u -> %pI4:%u\n",
346 &ip->saddr, ntohs(tcp->source), 348 &ip->saddr, ntohs(tcp->source),
347 &ip->daddr, ntohs(tcp->dest)); 349 &ip->daddr, ntohs(tcp->dest));
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
new file mode 100644
index 000000000000..5d6e8c05b3d4
--- /dev/null
+++ b/net/netlink/Kconfig
@@ -0,0 +1,10 @@
1#
2# Netlink Sockets
3#
4
5config NETLINK_DIAG
6 tristate "NETLINK: socket monitoring interface"
7 default n
8 ---help---
9 Support for NETLINK socket monitoring interface used by the ss tool.
10 If unsure, say Y.
diff --git a/net/netlink/Makefile b/net/netlink/Makefile
index bdd6ddf4e95b..e837917f6c03 100644
--- a/net/netlink/Makefile
+++ b/net/netlink/Makefile
@@ -3,3 +3,6 @@
3# 3#
4 4
5obj-y := af_netlink.o genetlink.o 5obj-y := af_netlink.o genetlink.o
6
7obj-$(CONFIG_NETLINK_DIAG) += netlink_diag.o
8netlink_diag-y := diag.o
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1e3fd5bfcd86..2a3e9ba814c4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6 * Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 9 * modify it under the terms of the GNU General Public License
@@ -55,87 +56,45 @@
55#include <linux/types.h> 56#include <linux/types.h>
56#include <linux/audit.h> 57#include <linux/audit.h>
57#include <linux/mutex.h> 58#include <linux/mutex.h>
59#include <linux/vmalloc.h>
60#include <asm/cacheflush.h>
58 61
59#include <net/net_namespace.h> 62#include <net/net_namespace.h>
60#include <net/sock.h> 63#include <net/sock.h>
61#include <net/scm.h> 64#include <net/scm.h>
62#include <net/netlink.h> 65#include <net/netlink.h>
63 66
64#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) 67#include "af_netlink.h"
65#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
66
67struct netlink_sock {
68 /* struct sock has to be the first member of netlink_sock */
69 struct sock sk;
70 u32 portid;
71 u32 dst_portid;
72 u32 dst_group;
73 u32 flags;
74 u32 subscriptions;
75 u32 ngroups;
76 unsigned long *groups;
77 unsigned long state;
78 wait_queue_head_t wait;
79 struct netlink_callback *cb;
80 struct mutex *cb_mutex;
81 struct mutex cb_def_mutex;
82 void (*netlink_rcv)(struct sk_buff *skb);
83 void (*netlink_bind)(int group);
84 struct module *module;
85};
86 68
87struct listeners { 69struct listeners {
88 struct rcu_head rcu; 70 struct rcu_head rcu;
89 unsigned long masks[0]; 71 unsigned long masks[0];
90}; 72};
91 73
74/* state bits */
75#define NETLINK_CONGESTED 0x0
76
77/* flags */
92#define NETLINK_KERNEL_SOCKET 0x1 78#define NETLINK_KERNEL_SOCKET 0x1
93#define NETLINK_RECV_PKTINFO 0x2 79#define NETLINK_RECV_PKTINFO 0x2
94#define NETLINK_BROADCAST_SEND_ERROR 0x4 80#define NETLINK_BROADCAST_SEND_ERROR 0x4
95#define NETLINK_RECV_NO_ENOBUFS 0x8 81#define NETLINK_RECV_NO_ENOBUFS 0x8
96 82
97static inline struct netlink_sock *nlk_sk(struct sock *sk)
98{
99 return container_of(sk, struct netlink_sock, sk);
100}
101
102static inline int netlink_is_kernel(struct sock *sk) 83static inline int netlink_is_kernel(struct sock *sk)
103{ 84{
104 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 85 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
105} 86}
106 87
107struct nl_portid_hash { 88struct netlink_table *nl_table;
108 struct hlist_head *table; 89EXPORT_SYMBOL_GPL(nl_table);
109 unsigned long rehash_time;
110
111 unsigned int mask;
112 unsigned int shift;
113
114 unsigned int entries;
115 unsigned int max_shift;
116
117 u32 rnd;
118};
119
120struct netlink_table {
121 struct nl_portid_hash hash;
122 struct hlist_head mc_list;
123 struct listeners __rcu *listeners;
124 unsigned int flags;
125 unsigned int groups;
126 struct mutex *cb_mutex;
127 struct module *module;
128 void (*bind)(int group);
129 int registered;
130};
131
132static struct netlink_table *nl_table;
133 90
134static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 91static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
135 92
136static int netlink_dump(struct sock *sk); 93static int netlink_dump(struct sock *sk);
94static void netlink_skb_destructor(struct sk_buff *skb);
137 95
138static DEFINE_RWLOCK(nl_table_lock); 96DEFINE_RWLOCK(nl_table_lock);
97EXPORT_SYMBOL_GPL(nl_table_lock);
139static atomic_t nl_table_users = ATOMIC_INIT(0); 98static atomic_t nl_table_users = ATOMIC_INIT(0);
140 99
141#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 100#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
@@ -152,6 +111,599 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u
152 return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; 111 return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
153} 112}
154 113
114static void netlink_overrun(struct sock *sk)
115{
116 struct netlink_sock *nlk = nlk_sk(sk);
117
118 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
119 if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
120 sk->sk_err = ENOBUFS;
121 sk->sk_error_report(sk);
122 }
123 }
124 atomic_inc(&sk->sk_drops);
125}
126
127static void netlink_rcv_wake(struct sock *sk)
128{
129 struct netlink_sock *nlk = nlk_sk(sk);
130
131 if (skb_queue_empty(&sk->sk_receive_queue))
132 clear_bit(NETLINK_CONGESTED, &nlk->state);
133 if (!test_bit(NETLINK_CONGESTED, &nlk->state))
134 wake_up_interruptible(&nlk->wait);
135}
136
137#ifdef CONFIG_NETLINK_MMAP
138static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
139{
140 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
141}
142
143static bool netlink_rx_is_mmaped(struct sock *sk)
144{
145 return nlk_sk(sk)->rx_ring.pg_vec != NULL;
146}
147
148static bool netlink_tx_is_mmaped(struct sock *sk)
149{
150 return nlk_sk(sk)->tx_ring.pg_vec != NULL;
151}
152
153static __pure struct page *pgvec_to_page(const void *addr)
154{
155 if (is_vmalloc_addr(addr))
156 return vmalloc_to_page(addr);
157 else
158 return virt_to_page(addr);
159}
160
161static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
162{
163 unsigned int i;
164
165 for (i = 0; i < len; i++) {
166 if (pg_vec[i] != NULL) {
167 if (is_vmalloc_addr(pg_vec[i]))
168 vfree(pg_vec[i]);
169 else
170 free_pages((unsigned long)pg_vec[i], order);
171 }
172 }
173 kfree(pg_vec);
174}
175
176static void *alloc_one_pg_vec_page(unsigned long order)
177{
178 void *buffer;
179 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
180 __GFP_NOWARN | __GFP_NORETRY;
181
182 buffer = (void *)__get_free_pages(gfp_flags, order);
183 if (buffer != NULL)
184 return buffer;
185
186 buffer = vzalloc((1 << order) * PAGE_SIZE);
187 if (buffer != NULL)
188 return buffer;
189
190 gfp_flags &= ~__GFP_NORETRY;
191 return (void *)__get_free_pages(gfp_flags, order);
192}
193
194static void **alloc_pg_vec(struct netlink_sock *nlk,
195 struct nl_mmap_req *req, unsigned int order)
196{
197 unsigned int block_nr = req->nm_block_nr;
198 unsigned int i;
199 void **pg_vec, *ptr;
200
201 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
202 if (pg_vec == NULL)
203 return NULL;
204
205 for (i = 0; i < block_nr; i++) {
206 pg_vec[i] = ptr = alloc_one_pg_vec_page(order);
207 if (pg_vec[i] == NULL)
208 goto err1;
209 }
210
211 return pg_vec;
212err1:
213 free_pg_vec(pg_vec, order, block_nr);
214 return NULL;
215}
216
217static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
218 bool closing, bool tx_ring)
219{
220 struct netlink_sock *nlk = nlk_sk(sk);
221 struct netlink_ring *ring;
222 struct sk_buff_head *queue;
223 void **pg_vec = NULL;
224 unsigned int order = 0;
225 int err;
226
227 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
228 queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
229
230 if (!closing) {
231 if (atomic_read(&nlk->mapped))
232 return -EBUSY;
233 if (atomic_read(&ring->pending))
234 return -EBUSY;
235 }
236
237 if (req->nm_block_nr) {
238 if (ring->pg_vec != NULL)
239 return -EBUSY;
240
241 if ((int)req->nm_block_size <= 0)
242 return -EINVAL;
243 if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE))
244 return -EINVAL;
245 if (req->nm_frame_size < NL_MMAP_HDRLEN)
246 return -EINVAL;
247 if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
248 return -EINVAL;
249
250 ring->frames_per_block = req->nm_block_size /
251 req->nm_frame_size;
252 if (ring->frames_per_block == 0)
253 return -EINVAL;
254 if (ring->frames_per_block * req->nm_block_nr !=
255 req->nm_frame_nr)
256 return -EINVAL;
257
258 order = get_order(req->nm_block_size);
259 pg_vec = alloc_pg_vec(nlk, req, order);
260 if (pg_vec == NULL)
261 return -ENOMEM;
262 } else {
263 if (req->nm_frame_nr)
264 return -EINVAL;
265 }
266
267 err = -EBUSY;
268 mutex_lock(&nlk->pg_vec_lock);
269 if (closing || atomic_read(&nlk->mapped) == 0) {
270 err = 0;
271 spin_lock_bh(&queue->lock);
272
273 ring->frame_max = req->nm_frame_nr - 1;
274 ring->head = 0;
275 ring->frame_size = req->nm_frame_size;
276 ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
277
278 swap(ring->pg_vec_len, req->nm_block_nr);
279 swap(ring->pg_vec_order, order);
280 swap(ring->pg_vec, pg_vec);
281
282 __skb_queue_purge(queue);
283 spin_unlock_bh(&queue->lock);
284
285 WARN_ON(atomic_read(&nlk->mapped));
286 }
287 mutex_unlock(&nlk->pg_vec_lock);
288
289 if (pg_vec)
290 free_pg_vec(pg_vec, order, req->nm_block_nr);
291 return err;
292}
293
294static void netlink_mm_open(struct vm_area_struct *vma)
295{
296 struct file *file = vma->vm_file;
297 struct socket *sock = file->private_data;
298 struct sock *sk = sock->sk;
299
300 if (sk)
301 atomic_inc(&nlk_sk(sk)->mapped);
302}
303
304static void netlink_mm_close(struct vm_area_struct *vma)
305{
306 struct file *file = vma->vm_file;
307 struct socket *sock = file->private_data;
308 struct sock *sk = sock->sk;
309
310 if (sk)
311 atomic_dec(&nlk_sk(sk)->mapped);
312}
313
314static const struct vm_operations_struct netlink_mmap_ops = {
315 .open = netlink_mm_open,
316 .close = netlink_mm_close,
317};
318
319static int netlink_mmap(struct file *file, struct socket *sock,
320 struct vm_area_struct *vma)
321{
322 struct sock *sk = sock->sk;
323 struct netlink_sock *nlk = nlk_sk(sk);
324 struct netlink_ring *ring;
325 unsigned long start, size, expected;
326 unsigned int i;
327 int err = -EINVAL;
328
329 if (vma->vm_pgoff)
330 return -EINVAL;
331
332 mutex_lock(&nlk->pg_vec_lock);
333
334 expected = 0;
335 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
336 if (ring->pg_vec == NULL)
337 continue;
338 expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
339 }
340
341 if (expected == 0)
342 goto out;
343
344 size = vma->vm_end - vma->vm_start;
345 if (size != expected)
346 goto out;
347
348 start = vma->vm_start;
349 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
350 if (ring->pg_vec == NULL)
351 continue;
352
353 for (i = 0; i < ring->pg_vec_len; i++) {
354 struct page *page;
355 void *kaddr = ring->pg_vec[i];
356 unsigned int pg_num;
357
358 for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
359 page = pgvec_to_page(kaddr);
360 err = vm_insert_page(vma, start, page);
361 if (err < 0)
362 goto out;
363 start += PAGE_SIZE;
364 kaddr += PAGE_SIZE;
365 }
366 }
367 }
368
369 atomic_inc(&nlk->mapped);
370 vma->vm_ops = &netlink_mmap_ops;
371 err = 0;
372out:
373 mutex_unlock(&nlk->pg_vec_lock);
374 return 0;
375}
376
377static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
378{
379#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
380 struct page *p_start, *p_end;
381
382 /* First page is flushed through netlink_{get,set}_status */
383 p_start = pgvec_to_page(hdr + PAGE_SIZE);
384 p_end = pgvec_to_page((void *)hdr + NL_MMAP_MSG_HDRLEN + hdr->nm_len - 1);
385 while (p_start <= p_end) {
386 flush_dcache_page(p_start);
387 p_start++;
388 }
389#endif
390}
391
392static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
393{
394 smp_rmb();
395 flush_dcache_page(pgvec_to_page(hdr));
396 return hdr->nm_status;
397}
398
399static void netlink_set_status(struct nl_mmap_hdr *hdr,
400 enum nl_mmap_status status)
401{
402 hdr->nm_status = status;
403 flush_dcache_page(pgvec_to_page(hdr));
404 smp_wmb();
405}
406
407static struct nl_mmap_hdr *
408__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
409{
410 unsigned int pg_vec_pos, frame_off;
411
412 pg_vec_pos = pos / ring->frames_per_block;
413 frame_off = pos % ring->frames_per_block;
414
415 return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
416}
417
418static struct nl_mmap_hdr *
419netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
420 enum nl_mmap_status status)
421{
422 struct nl_mmap_hdr *hdr;
423
424 hdr = __netlink_lookup_frame(ring, pos);
425 if (netlink_get_status(hdr) != status)
426 return NULL;
427
428 return hdr;
429}
430
431static struct nl_mmap_hdr *
432netlink_current_frame(const struct netlink_ring *ring,
433 enum nl_mmap_status status)
434{
435 return netlink_lookup_frame(ring, ring->head, status);
436}
437
438static struct nl_mmap_hdr *
439netlink_previous_frame(const struct netlink_ring *ring,
440 enum nl_mmap_status status)
441{
442 unsigned int prev;
443
444 prev = ring->head ? ring->head - 1 : ring->frame_max;
445 return netlink_lookup_frame(ring, prev, status);
446}
447
448static void netlink_increment_head(struct netlink_ring *ring)
449{
450 ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
451}
452
453static void netlink_forward_ring(struct netlink_ring *ring)
454{
455 unsigned int head = ring->head, pos = head;
456 const struct nl_mmap_hdr *hdr;
457
458 do {
459 hdr = __netlink_lookup_frame(ring, pos);
460 if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
461 break;
462 if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
463 break;
464 netlink_increment_head(ring);
465 } while (ring->head != head);
466}
467
468static bool netlink_dump_space(struct netlink_sock *nlk)
469{
470 struct netlink_ring *ring = &nlk->rx_ring;
471 struct nl_mmap_hdr *hdr;
472 unsigned int n;
473
474 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
475 if (hdr == NULL)
476 return false;
477
478 n = ring->head + ring->frame_max / 2;
479 if (n > ring->frame_max)
480 n -= ring->frame_max;
481
482 hdr = __netlink_lookup_frame(ring, n);
483
484 return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
485}
486
487static unsigned int netlink_poll(struct file *file, struct socket *sock,
488 poll_table *wait)
489{
490 struct sock *sk = sock->sk;
491 struct netlink_sock *nlk = nlk_sk(sk);
492 unsigned int mask;
493 int err;
494
495 if (nlk->rx_ring.pg_vec != NULL) {
496 /* Memory mapped sockets don't call recvmsg(), so flow control
497 * for dumps is performed here. A dump is allowed to continue
498 * if at least half the ring is unused.
499 */
500 while (nlk->cb != NULL && netlink_dump_space(nlk)) {
501 err = netlink_dump(sk);
502 if (err < 0) {
503 sk->sk_err = err;
504 sk->sk_error_report(sk);
505 break;
506 }
507 }
508 netlink_rcv_wake(sk);
509 }
510
511 mask = datagram_poll(file, sock, wait);
512
513 spin_lock_bh(&sk->sk_receive_queue.lock);
514 if (nlk->rx_ring.pg_vec) {
515 netlink_forward_ring(&nlk->rx_ring);
516 if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
517 mask |= POLLIN | POLLRDNORM;
518 }
519 spin_unlock_bh(&sk->sk_receive_queue.lock);
520
521 spin_lock_bh(&sk->sk_write_queue.lock);
522 if (nlk->tx_ring.pg_vec) {
523 if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
524 mask |= POLLOUT | POLLWRNORM;
525 }
526 spin_unlock_bh(&sk->sk_write_queue.lock);
527
528 return mask;
529}
530
531static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
532{
533 return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
534}
535
536static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
537 struct netlink_ring *ring,
538 struct nl_mmap_hdr *hdr)
539{
540 unsigned int size;
541 void *data;
542
543 size = ring->frame_size - NL_MMAP_HDRLEN;
544 data = (void *)hdr + NL_MMAP_HDRLEN;
545
546 skb->head = data;
547 skb->data = data;
548 skb_reset_tail_pointer(skb);
549 skb->end = skb->tail + size;
550 skb->len = 0;
551
552 skb->destructor = netlink_skb_destructor;
553 NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
554 NETLINK_CB(skb).sk = sk;
555}
556
557static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
558 u32 dst_portid, u32 dst_group,
559 struct sock_iocb *siocb)
560{
561 struct netlink_sock *nlk = nlk_sk(sk);
562 struct netlink_ring *ring;
563 struct nl_mmap_hdr *hdr;
564 struct sk_buff *skb;
565 unsigned int maxlen;
566 bool excl = true;
567 int err = 0, len = 0;
568
569 /* Netlink messages are validated by the receiver before processing.
570 * In order to avoid userspace changing the contents of the message
571 * after validation, the socket and the ring may only be used by a
572 * single process, otherwise we fall back to copying.
573 */
574 if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
575 atomic_read(&nlk->mapped) > 1)
576 excl = false;
577
578 mutex_lock(&nlk->pg_vec_lock);
579
580 ring = &nlk->tx_ring;
581 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
582
583 do {
584 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
585 if (hdr == NULL) {
586 if (!(msg->msg_flags & MSG_DONTWAIT) &&
587 atomic_read(&nlk->tx_ring.pending))
588 schedule();
589 continue;
590 }
591 if (hdr->nm_len > maxlen) {
592 err = -EINVAL;
593 goto out;
594 }
595
596 netlink_frame_flush_dcache(hdr);
597
598 if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
599 skb = alloc_skb_head(GFP_KERNEL);
600 if (skb == NULL) {
601 err = -ENOBUFS;
602 goto out;
603 }
604 sock_hold(sk);
605 netlink_ring_setup_skb(skb, sk, ring, hdr);
606 NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
607 __skb_put(skb, hdr->nm_len);
608 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
609 atomic_inc(&ring->pending);
610 } else {
611 skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
612 if (skb == NULL) {
613 err = -ENOBUFS;
614 goto out;
615 }
616 __skb_put(skb, hdr->nm_len);
617 memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
618 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
619 }
620
621 netlink_increment_head(ring);
622
623 NETLINK_CB(skb).portid = nlk->portid;
624 NETLINK_CB(skb).dst_group = dst_group;
625 NETLINK_CB(skb).creds = siocb->scm->creds;
626
627 err = security_netlink_send(sk, skb);
628 if (err) {
629 kfree_skb(skb);
630 goto out;
631 }
632
633 if (unlikely(dst_group)) {
634 atomic_inc(&skb->users);
635 netlink_broadcast(sk, skb, dst_portid, dst_group,
636 GFP_KERNEL);
637 }
638 err = netlink_unicast(sk, skb, dst_portid,
639 msg->msg_flags & MSG_DONTWAIT);
640 if (err < 0)
641 goto out;
642 len += err;
643
644 } while (hdr != NULL ||
645 (!(msg->msg_flags & MSG_DONTWAIT) &&
646 atomic_read(&nlk->tx_ring.pending)));
647
648 if (len > 0)
649 err = len;
650out:
651 mutex_unlock(&nlk->pg_vec_lock);
652 return err;
653}
654
655static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
656{
657 struct nl_mmap_hdr *hdr;
658
659 hdr = netlink_mmap_hdr(skb);
660 hdr->nm_len = skb->len;
661 hdr->nm_group = NETLINK_CB(skb).dst_group;
662 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
663 hdr->nm_uid = NETLINK_CB(skb).creds.uid;
664 hdr->nm_gid = NETLINK_CB(skb).creds.gid;
665 netlink_frame_flush_dcache(hdr);
666 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
667
668 NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
669 kfree_skb(skb);
670}
671
672static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
673{
674 struct netlink_sock *nlk = nlk_sk(sk);
675 struct netlink_ring *ring = &nlk->rx_ring;
676 struct nl_mmap_hdr *hdr;
677
678 spin_lock_bh(&sk->sk_receive_queue.lock);
679 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
680 if (hdr == NULL) {
681 spin_unlock_bh(&sk->sk_receive_queue.lock);
682 kfree_skb(skb);
683 netlink_overrun(sk);
684 return;
685 }
686 netlink_increment_head(ring);
687 __skb_queue_tail(&sk->sk_receive_queue, skb);
688 spin_unlock_bh(&sk->sk_receive_queue.lock);
689
690 hdr->nm_len = skb->len;
691 hdr->nm_group = NETLINK_CB(skb).dst_group;
692 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
693 hdr->nm_uid = NETLINK_CB(skb).creds.uid;
694 hdr->nm_gid = NETLINK_CB(skb).creds.gid;
695 netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
696}
697
698#else /* CONFIG_NETLINK_MMAP */
699#define netlink_skb_is_mmaped(skb) false
700#define netlink_rx_is_mmaped(sk) false
701#define netlink_tx_is_mmaped(sk) false
702#define netlink_mmap sock_no_mmap
703#define netlink_poll datagram_poll
704#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0
705#endif /* CONFIG_NETLINK_MMAP */
706
155static void netlink_destroy_callback(struct netlink_callback *cb) 707static void netlink_destroy_callback(struct netlink_callback *cb)
156{ 708{
157 kfree_skb(cb->skb); 709 kfree_skb(cb->skb);
@@ -164,6 +716,53 @@ static void netlink_consume_callback(struct netlink_callback *cb)
164 kfree(cb); 716 kfree(cb);
165} 717}
166 718
719static void netlink_skb_destructor(struct sk_buff *skb)
720{
721#ifdef CONFIG_NETLINK_MMAP
722 struct nl_mmap_hdr *hdr;
723 struct netlink_ring *ring;
724 struct sock *sk;
725
726 /* If a packet from the kernel to userspace was freed because of an
727 * error without being delivered to userspace, the kernel must reset
728 * the status. In the direction userspace to kernel, the status is
729 * always reset here after the packet was processed and freed.
730 */
731 if (netlink_skb_is_mmaped(skb)) {
732 hdr = netlink_mmap_hdr(skb);
733 sk = NETLINK_CB(skb).sk;
734
735 if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
736 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
737 ring = &nlk_sk(sk)->tx_ring;
738 } else {
739 if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
740 hdr->nm_len = 0;
741 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
742 }
743 ring = &nlk_sk(sk)->rx_ring;
744 }
745
746 WARN_ON(atomic_read(&ring->pending) == 0);
747 atomic_dec(&ring->pending);
748 sock_put(sk);
749
750 skb->data = NULL;
751 }
752#endif
753 if (skb->sk != NULL)
754 sock_rfree(skb);
755}
756
757static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
758{
759 WARN_ON(skb->sk != NULL);
760 skb->sk = sk;
761 skb->destructor = netlink_skb_destructor;
762 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
763 sk_mem_charge(sk, skb->truesize);
764}
765
167static void netlink_sock_destruct(struct sock *sk) 766static void netlink_sock_destruct(struct sock *sk)
168{ 767{
169 struct netlink_sock *nlk = nlk_sk(sk); 768 struct netlink_sock *nlk = nlk_sk(sk);
@@ -177,6 +776,18 @@ static void netlink_sock_destruct(struct sock *sk)
177 } 776 }
178 777
179 skb_queue_purge(&sk->sk_receive_queue); 778 skb_queue_purge(&sk->sk_receive_queue);
779#ifdef CONFIG_NETLINK_MMAP
780 if (1) {
781 struct nl_mmap_req req;
782
783 memset(&req, 0, sizeof(req));
784 if (nlk->rx_ring.pg_vec)
785 netlink_set_ring(sk, &req, true, false);
786 memset(&req, 0, sizeof(req));
787 if (nlk->tx_ring.pg_vec)
788 netlink_set_ring(sk, &req, true, true);
789 }
790#endif /* CONFIG_NETLINK_MMAP */
180 791
181 if (!sock_flag(sk, SOCK_DEAD)) { 792 if (!sock_flag(sk, SOCK_DEAD)) {
182 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 793 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
@@ -440,6 +1051,9 @@ static int __netlink_create(struct net *net, struct socket *sock,
440 mutex_init(nlk->cb_mutex); 1051 mutex_init(nlk->cb_mutex);
441 } 1052 }
442 init_waitqueue_head(&nlk->wait); 1053 init_waitqueue_head(&nlk->wait);
1054#ifdef CONFIG_NETLINK_MMAP
1055 mutex_init(&nlk->pg_vec_lock);
1056#endif
443 1057
444 sk->sk_destruct = netlink_sock_destruct; 1058 sk->sk_destruct = netlink_sock_destruct;
445 sk->sk_protocol = protocol; 1059 sk->sk_protocol = protocol;
@@ -771,19 +1385,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
771 return 0; 1385 return 0;
772} 1386}
773 1387
774static void netlink_overrun(struct sock *sk)
775{
776 struct netlink_sock *nlk = nlk_sk(sk);
777
778 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
779 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
780 sk->sk_err = ENOBUFS;
781 sk->sk_error_report(sk);
782 }
783 }
784 atomic_inc(&sk->sk_drops);
785}
786
787static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1388static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
788{ 1389{
789 struct sock *sock; 1390 struct sock *sock;
@@ -836,8 +1437,9 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
836 1437
837 nlk = nlk_sk(sk); 1438 nlk = nlk_sk(sk);
838 1439
839 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1440 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
840 test_bit(0, &nlk->state)) { 1441 test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1442 !netlink_skb_is_mmaped(skb)) {
841 DECLARE_WAITQUEUE(wait, current); 1443 DECLARE_WAITQUEUE(wait, current);
842 if (!*timeo) { 1444 if (!*timeo) {
843 if (!ssk || netlink_is_kernel(ssk)) 1445 if (!ssk || netlink_is_kernel(ssk))
@@ -851,7 +1453,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
851 add_wait_queue(&nlk->wait, &wait); 1453 add_wait_queue(&nlk->wait, &wait);
852 1454
853 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1455 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
854 test_bit(0, &nlk->state)) && 1456 test_bit(NETLINK_CONGESTED, &nlk->state)) &&
855 !sock_flag(sk, SOCK_DEAD)) 1457 !sock_flag(sk, SOCK_DEAD))
856 *timeo = schedule_timeout(*timeo); 1458 *timeo = schedule_timeout(*timeo);
857 1459
@@ -865,7 +1467,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
865 } 1467 }
866 return 1; 1468 return 1;
867 } 1469 }
868 skb_set_owner_r(skb, sk); 1470 netlink_skb_set_owner_r(skb, sk);
869 return 0; 1471 return 0;
870} 1472}
871 1473
@@ -873,7 +1475,14 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
873{ 1475{
874 int len = skb->len; 1476 int len = skb->len;
875 1477
876 skb_queue_tail(&sk->sk_receive_queue, skb); 1478#ifdef CONFIG_NETLINK_MMAP
1479 if (netlink_skb_is_mmaped(skb))
1480 netlink_queue_mmaped_skb(sk, skb);
1481 else if (netlink_rx_is_mmaped(sk))
1482 netlink_ring_set_copied(sk, skb);
1483 else
1484#endif /* CONFIG_NETLINK_MMAP */
1485 skb_queue_tail(&sk->sk_receive_queue, skb);
877 sk->sk_data_ready(sk, len); 1486 sk->sk_data_ready(sk, len);
878 return len; 1487 return len;
879} 1488}
@@ -896,7 +1505,9 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
896{ 1505{
897 int delta; 1506 int delta;
898 1507
899 skb_orphan(skb); 1508 WARN_ON(skb->sk != NULL);
1509 if (netlink_skb_is_mmaped(skb))
1510 return skb;
900 1511
901 delta = skb->end - skb->tail; 1512 delta = skb->end - skb->tail;
902 if (delta * 2 < skb->truesize) 1513 if (delta * 2 < skb->truesize)
@@ -916,16 +1527,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
916 return skb; 1527 return skb;
917} 1528}
918 1529
919static void netlink_rcv_wake(struct sock *sk)
920{
921 struct netlink_sock *nlk = nlk_sk(sk);
922
923 if (skb_queue_empty(&sk->sk_receive_queue))
924 clear_bit(0, &nlk->state);
925 if (!test_bit(0, &nlk->state))
926 wake_up_interruptible(&nlk->wait);
927}
928
929static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, 1530static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
930 struct sock *ssk) 1531 struct sock *ssk)
931{ 1532{
@@ -935,8 +1536,8 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
935 ret = -ECONNREFUSED; 1536 ret = -ECONNREFUSED;
936 if (nlk->netlink_rcv != NULL) { 1537 if (nlk->netlink_rcv != NULL) {
937 ret = skb->len; 1538 ret = skb->len;
938 skb_set_owner_r(skb, sk); 1539 netlink_skb_set_owner_r(skb, sk);
939 NETLINK_CB(skb).ssk = ssk; 1540 NETLINK_CB(skb).sk = ssk;
940 nlk->netlink_rcv(skb); 1541 nlk->netlink_rcv(skb);
941 consume_skb(skb); 1542 consume_skb(skb);
942 } else { 1543 } else {
@@ -982,6 +1583,69 @@ retry:
982} 1583}
983EXPORT_SYMBOL(netlink_unicast); 1584EXPORT_SYMBOL(netlink_unicast);
984 1585
1586struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
1587 u32 dst_portid, gfp_t gfp_mask)
1588{
1589#ifdef CONFIG_NETLINK_MMAP
1590 struct sock *sk = NULL;
1591 struct sk_buff *skb;
1592 struct netlink_ring *ring;
1593 struct nl_mmap_hdr *hdr;
1594 unsigned int maxlen;
1595
1596 sk = netlink_getsockbyportid(ssk, dst_portid);
1597 if (IS_ERR(sk))
1598 goto out;
1599
1600 ring = &nlk_sk(sk)->rx_ring;
1601 /* fast-path without atomic ops for common case: non-mmaped receiver */
1602 if (ring->pg_vec == NULL)
1603 goto out_put;
1604
1605 skb = alloc_skb_head(gfp_mask);
1606 if (skb == NULL)
1607 goto err1;
1608
1609 spin_lock_bh(&sk->sk_receive_queue.lock);
1610 /* check again under lock */
1611 if (ring->pg_vec == NULL)
1612 goto out_free;
1613
1614 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1615 if (maxlen < size)
1616 goto out_free;
1617
1618 netlink_forward_ring(ring);
1619 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1620 if (hdr == NULL)
1621 goto err2;
1622 netlink_ring_setup_skb(skb, sk, ring, hdr);
1623 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1624 atomic_inc(&ring->pending);
1625 netlink_increment_head(ring);
1626
1627 spin_unlock_bh(&sk->sk_receive_queue.lock);
1628 return skb;
1629
1630err2:
1631 kfree_skb(skb);
1632 spin_unlock_bh(&sk->sk_receive_queue.lock);
1633 netlink_overrun(sk);
1634err1:
1635 sock_put(sk);
1636 return NULL;
1637
1638out_free:
1639 kfree_skb(skb);
1640 spin_unlock_bh(&sk->sk_receive_queue.lock);
1641out_put:
1642 sock_put(sk);
1643out:
1644#endif
1645 return alloc_skb(size, gfp_mask);
1646}
1647EXPORT_SYMBOL_GPL(netlink_alloc_skb);
1648
985int netlink_has_listeners(struct sock *sk, unsigned int group) 1649int netlink_has_listeners(struct sock *sk, unsigned int group)
986{ 1650{
987 int res = 0; 1651 int res = 0;
@@ -1006,8 +1670,8 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1006 struct netlink_sock *nlk = nlk_sk(sk); 1670 struct netlink_sock *nlk = nlk_sk(sk);
1007 1671
1008 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1672 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1009 !test_bit(0, &nlk->state)) { 1673 !test_bit(NETLINK_CONGESTED, &nlk->state)) {
1010 skb_set_owner_r(skb, sk); 1674 netlink_skb_set_owner_r(skb, sk);
1011 __netlink_sendskb(sk, skb); 1675 __netlink_sendskb(sk, skb);
1012 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1676 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1013 } 1677 }
@@ -1242,7 +1906,8 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
1242 if (level != SOL_NETLINK) 1906 if (level != SOL_NETLINK)
1243 return -ENOPROTOOPT; 1907 return -ENOPROTOOPT;
1244 1908
1245 if (optlen >= sizeof(int) && 1909 if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
1910 optlen >= sizeof(int) &&
1246 get_user(val, (unsigned int __user *)optval)) 1911 get_user(val, (unsigned int __user *)optval))
1247 return -EFAULT; 1912 return -EFAULT;
1248 1913
@@ -1284,13 +1949,32 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
1284 case NETLINK_NO_ENOBUFS: 1949 case NETLINK_NO_ENOBUFS:
1285 if (val) { 1950 if (val) {
1286 nlk->flags |= NETLINK_RECV_NO_ENOBUFS; 1951 nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1287 clear_bit(0, &nlk->state); 1952 clear_bit(NETLINK_CONGESTED, &nlk->state);
1288 wake_up_interruptible(&nlk->wait); 1953 wake_up_interruptible(&nlk->wait);
1289 } else { 1954 } else {
1290 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; 1955 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1291 } 1956 }
1292 err = 0; 1957 err = 0;
1293 break; 1958 break;
1959#ifdef CONFIG_NETLINK_MMAP
1960 case NETLINK_RX_RING:
1961 case NETLINK_TX_RING: {
1962 struct nl_mmap_req req;
1963
1964 /* Rings might consume more memory than queue limits, require
1965 * CAP_NET_ADMIN.
1966 */
1967 if (!capable(CAP_NET_ADMIN))
1968 return -EPERM;
1969 if (optlen < sizeof(req))
1970 return -EINVAL;
1971 if (copy_from_user(&req, optval, sizeof(req)))
1972 return -EFAULT;
1973 err = netlink_set_ring(sk, &req, false,
1974 optname == NETLINK_TX_RING);
1975 break;
1976 }
1977#endif /* CONFIG_NETLINK_MMAP */
1294 default: 1978 default:
1295 err = -ENOPROTOOPT; 1979 err = -ENOPROTOOPT;
1296 } 1980 }
@@ -1401,6 +2085,13 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1401 goto out; 2085 goto out;
1402 } 2086 }
1403 2087
2088 if (netlink_tx_is_mmaped(sk) &&
2089 msg->msg_iov->iov_base == NULL) {
2090 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
2091 siocb);
2092 goto out;
2093 }
2094
1404 err = -EMSGSIZE; 2095 err = -EMSGSIZE;
1405 if (len > sk->sk_sndbuf - 32) 2096 if (len > sk->sk_sndbuf - 32)
1406 goto out; 2097 goto out;
@@ -1695,7 +2386,7 @@ struct nlmsghdr *
1695__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) 2386__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
1696{ 2387{
1697 struct nlmsghdr *nlh; 2388 struct nlmsghdr *nlh;
1698 int size = NLMSG_LENGTH(len); 2389 int size = nlmsg_msg_size(len);
1699 2390
1700 nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); 2391 nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
1701 nlh->nlmsg_type = type; 2392 nlh->nlmsg_type = type;
@@ -1704,7 +2395,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
1704 nlh->nlmsg_pid = portid; 2395 nlh->nlmsg_pid = portid;
1705 nlh->nlmsg_seq = seq; 2396 nlh->nlmsg_seq = seq;
1706 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) 2397 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
1707 memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); 2398 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
1708 return nlh; 2399 return nlh;
1709} 2400}
1710EXPORT_SYMBOL(__nlmsg_put); 2401EXPORT_SYMBOL(__nlmsg_put);
@@ -1733,9 +2424,13 @@ static int netlink_dump(struct sock *sk)
1733 2424
1734 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2425 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
1735 2426
1736 skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL); 2427 if (!netlink_rx_is_mmaped(sk) &&
2428 atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2429 goto errout_skb;
2430 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
1737 if (!skb) 2431 if (!skb)
1738 goto errout_skb; 2432 goto errout_skb;
2433 netlink_skb_set_owner_r(skb, sk);
1739 2434
1740 len = cb->dump(skb, cb); 2435 len = cb->dump(skb, cb);
1741 2436
@@ -1790,6 +2485,19 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1790 if (cb == NULL) 2485 if (cb == NULL)
1791 return -ENOBUFS; 2486 return -ENOBUFS;
1792 2487
2488 /* Memory mapped dump requests need to be copied to avoid looping
2489 * on the pending state in netlink_mmap_sendmsg() while the CB hold
2490 * a reference to the skb.
2491 */
2492 if (netlink_skb_is_mmaped(skb)) {
2493 skb = skb_copy(skb, GFP_KERNEL);
2494 if (skb == NULL) {
2495 kfree(cb);
2496 return -ENOBUFS;
2497 }
2498 } else
2499 atomic_inc(&skb->users);
2500
1793 cb->dump = control->dump; 2501 cb->dump = control->dump;
1794 cb->done = control->done; 2502 cb->done = control->done;
1795 cb->nlh = nlh; 2503 cb->nlh = nlh;
@@ -1850,7 +2558,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1850 if (err) 2558 if (err)
1851 payload += nlmsg_len(nlh); 2559 payload += nlmsg_len(nlh);
1852 2560
1853 skb = nlmsg_new(payload, GFP_KERNEL); 2561 skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
2562 NETLINK_CB(in_skb).portid, GFP_KERNEL);
1854 if (!skb) { 2563 if (!skb) {
1855 struct sock *sk; 2564 struct sock *sk;
1856 2565
@@ -2116,7 +2825,7 @@ static const struct proto_ops netlink_ops = {
2116 .socketpair = sock_no_socketpair, 2825 .socketpair = sock_no_socketpair,
2117 .accept = sock_no_accept, 2826 .accept = sock_no_accept,
2118 .getname = netlink_getname, 2827 .getname = netlink_getname,
2119 .poll = datagram_poll, 2828 .poll = netlink_poll,
2120 .ioctl = sock_no_ioctl, 2829 .ioctl = sock_no_ioctl,
2121 .listen = sock_no_listen, 2830 .listen = sock_no_listen,
2122 .shutdown = sock_no_shutdown, 2831 .shutdown = sock_no_shutdown,
@@ -2124,7 +2833,7 @@ static const struct proto_ops netlink_ops = {
2124 .getsockopt = netlink_getsockopt, 2833 .getsockopt = netlink_getsockopt,
2125 .sendmsg = netlink_sendmsg, 2834 .sendmsg = netlink_sendmsg,
2126 .recvmsg = netlink_recvmsg, 2835 .recvmsg = netlink_recvmsg,
2127 .mmap = sock_no_mmap, 2836 .mmap = netlink_mmap,
2128 .sendpage = sock_no_sendpage, 2837 .sendpage = sock_no_sendpage,
2129}; 2838};
2130 2839
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
new file mode 100644
index 000000000000..ed8522265f4e
--- /dev/null
+++ b/net/netlink/af_netlink.h
@@ -0,0 +1,82 @@
1#ifndef _AF_NETLINK_H
2#define _AF_NETLINK_H
3
4#include <net/sock.h>
5
6#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
7#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
8
9struct netlink_ring {
10 void **pg_vec;
11 unsigned int head;
12 unsigned int frames_per_block;
13 unsigned int frame_size;
14 unsigned int frame_max;
15
16 unsigned int pg_vec_order;
17 unsigned int pg_vec_pages;
18 unsigned int pg_vec_len;
19
20 atomic_t pending;
21};
22
23struct netlink_sock {
24 /* struct sock has to be the first member of netlink_sock */
25 struct sock sk;
26 u32 portid;
27 u32 dst_portid;
28 u32 dst_group;
29 u32 flags;
30 u32 subscriptions;
31 u32 ngroups;
32 unsigned long *groups;
33 unsigned long state;
34 wait_queue_head_t wait;
35 struct netlink_callback *cb;
36 struct mutex *cb_mutex;
37 struct mutex cb_def_mutex;
38 void (*netlink_rcv)(struct sk_buff *skb);
39 void (*netlink_bind)(int group);
40 struct module *module;
41#ifdef CONFIG_NETLINK_MMAP
42 struct mutex pg_vec_lock;
43 struct netlink_ring rx_ring;
44 struct netlink_ring tx_ring;
45 atomic_t mapped;
46#endif /* CONFIG_NETLINK_MMAP */
47};
48
49static inline struct netlink_sock *nlk_sk(struct sock *sk)
50{
51 return container_of(sk, struct netlink_sock, sk);
52}
53
54struct nl_portid_hash {
55 struct hlist_head *table;
56 unsigned long rehash_time;
57
58 unsigned int mask;
59 unsigned int shift;
60
61 unsigned int entries;
62 unsigned int max_shift;
63
64 u32 rnd;
65};
66
67struct netlink_table {
68 struct nl_portid_hash hash;
69 struct hlist_head mc_list;
70 struct listeners __rcu *listeners;
71 unsigned int flags;
72 unsigned int groups;
73 struct mutex *cb_mutex;
74 struct module *module;
75 void (*bind)(int group);
76 int registered;
77};
78
79extern struct netlink_table *nl_table;
80extern rwlock_t nl_table_lock;
81
82#endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
new file mode 100644
index 000000000000..4e4aa471cd05
--- /dev/null
+++ b/net/netlink/diag.c
@@ -0,0 +1,220 @@
1#include <linux/module.h>
2
3#include <net/sock.h>
4#include <linux/netlink.h>
5#include <linux/sock_diag.h>
6#include <linux/netlink_diag.h>
7
8#include "af_netlink.h"
9
10static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
11 struct sk_buff *nlskb)
12{
13 struct netlink_diag_ring ndr;
14
15 ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
16 ndr.ndr_block_nr = ring->pg_vec_len;
17 ndr.ndr_frame_size = ring->frame_size;
18 ndr.ndr_frame_nr = ring->frame_max + 1;
19
20 return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
21}
22
23static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
24{
25 struct netlink_sock *nlk = nlk_sk(sk);
26 int ret;
27
28 mutex_lock(&nlk->pg_vec_lock);
29 ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
30 if (!ret)
31 ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
32 nlskb);
33 mutex_unlock(&nlk->pg_vec_lock);
34
35 return ret;
36}
37
38static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
39{
40 struct netlink_sock *nlk = nlk_sk(sk);
41
42 if (nlk->groups == NULL)
43 return 0;
44
45 return nla_put(nlskb, NETLINK_DIAG_GROUPS, NLGRPSZ(nlk->ngroups),
46 nlk->groups);
47}
48
49static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
50 struct netlink_diag_req *req,
51 u32 portid, u32 seq, u32 flags, int sk_ino)
52{
53 struct nlmsghdr *nlh;
54 struct netlink_diag_msg *rep;
55 struct netlink_sock *nlk = nlk_sk(sk);
56
57 nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
58 flags);
59 if (!nlh)
60 return -EMSGSIZE;
61
62 rep = nlmsg_data(nlh);
63 rep->ndiag_family = AF_NETLINK;
64 rep->ndiag_type = sk->sk_type;
65 rep->ndiag_protocol = sk->sk_protocol;
66 rep->ndiag_state = sk->sk_state;
67
68 rep->ndiag_ino = sk_ino;
69 rep->ndiag_portid = nlk->portid;
70 rep->ndiag_dst_portid = nlk->dst_portid;
71 rep->ndiag_dst_group = nlk->dst_group;
72 sock_diag_save_cookie(sk, rep->ndiag_cookie);
73
74 if ((req->ndiag_show & NDIAG_SHOW_GROUPS) &&
75 sk_diag_dump_groups(sk, skb))
76 goto out_nlmsg_trim;
77
78 if ((req->ndiag_show & NDIAG_SHOW_MEMINFO) &&
79 sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
80 goto out_nlmsg_trim;
81
82 if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
83 sk_diag_put_rings_cfg(sk, skb))
84 goto out_nlmsg_trim;
85
86 return nlmsg_end(skb, nlh);
87
88out_nlmsg_trim:
89 nlmsg_cancel(skb, nlh);
90 return -EMSGSIZE;
91}
92
93static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
94 int protocol, int s_num)
95{
96 struct netlink_table *tbl = &nl_table[protocol];
97 struct nl_portid_hash *hash = &tbl->hash;
98 struct net *net = sock_net(skb->sk);
99 struct netlink_diag_req *req;
100 struct sock *sk;
101 int ret = 0, num = 0, i;
102
103 req = nlmsg_data(cb->nlh);
104
105 for (i = 0; i <= hash->mask; i++) {
106 sk_for_each(sk, &hash->table[i]) {
107 if (!net_eq(sock_net(sk), net))
108 continue;
109 if (num < s_num) {
110 num++;
111 continue;
112 }
113
114 if (sk_diag_fill(sk, skb, req,
115 NETLINK_CB(cb->skb).portid,
116 cb->nlh->nlmsg_seq,
117 NLM_F_MULTI,
118 sock_i_ino(sk)) < 0) {
119 ret = 1;
120 goto done;
121 }
122
123 num++;
124 }
125 }
126
127 sk_for_each_bound(sk, &tbl->mc_list) {
128 if (sk_hashed(sk))
129 continue;
130 if (!net_eq(sock_net(sk), net))
131 continue;
132 if (num < s_num) {
133 num++;
134 continue;
135 }
136
137 if (sk_diag_fill(sk, skb, req,
138 NETLINK_CB(cb->skb).portid,
139 cb->nlh->nlmsg_seq,
140 NLM_F_MULTI,
141 sock_i_ino(sk)) < 0) {
142 ret = 1;
143 goto done;
144 }
145 num++;
146 }
147done:
148 cb->args[0] = num;
149 cb->args[1] = protocol;
150
151 return ret;
152}
153
154static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
155{
156 struct netlink_diag_req *req;
157 int s_num = cb->args[0];
158
159 req = nlmsg_data(cb->nlh);
160
161 read_lock(&nl_table_lock);
162
163 if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
164 int i;
165
166 for (i = cb->args[1]; i < MAX_LINKS; i++) {
167 if (__netlink_diag_dump(skb, cb, i, s_num))
168 break;
169 s_num = 0;
170 }
171 } else {
172 if (req->sdiag_protocol >= MAX_LINKS) {
173 read_unlock(&nl_table_lock);
174 return -ENOENT;
175 }
176
177 __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
178 }
179
180 read_unlock(&nl_table_lock);
181
182 return skb->len;
183}
184
185static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
186{
187 int hdrlen = sizeof(struct netlink_diag_req);
188 struct net *net = sock_net(skb->sk);
189
190 if (nlmsg_len(h) < hdrlen)
191 return -EINVAL;
192
193 if (h->nlmsg_flags & NLM_F_DUMP) {
194 struct netlink_dump_control c = {
195 .dump = netlink_diag_dump,
196 };
197 return netlink_dump_start(net->diag_nlsk, skb, h, &c);
198 } else
199 return -EOPNOTSUPP;
200}
201
202static const struct sock_diag_handler netlink_diag_handler = {
203 .family = AF_NETLINK,
204 .dump = netlink_diag_handler_dump,
205};
206
207static int __init netlink_diag_init(void)
208{
209 return sock_diag_register(&netlink_diag_handler);
210}
211
212static void __exit netlink_diag_exit(void)
213{
214 sock_diag_unregister(&netlink_diag_handler);
215}
216
217module_init(netlink_diag_init);
218module_exit(netlink_diag_exit);
219MODULE_LICENSE("GPL");
220MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */);
diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c
index c6bc3bd95052..b75a9b3f9e89 100644
--- a/net/nfc/llcp/commands.c
+++ b/net/nfc/llcp/commands.c
@@ -117,6 +117,88 @@ u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)
117 return tlv; 117 return tlv;
118} 118}
119 119
120struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap)
121{
122 struct nfc_llcp_sdp_tlv *sdres;
123 u8 value[2];
124
125 sdres = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
126 if (sdres == NULL)
127 return NULL;
128
129 value[0] = tid;
130 value[1] = sap;
131
132 sdres->tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, value, 2,
133 &sdres->tlv_len);
134 if (sdres->tlv == NULL) {
135 kfree(sdres);
136 return NULL;
137 }
138
139 sdres->tid = tid;
140 sdres->sap = sap;
141
142 INIT_HLIST_NODE(&sdres->node);
143
144 return sdres;
145}
146
147struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
148 size_t uri_len)
149{
150 struct nfc_llcp_sdp_tlv *sdreq;
151
152 pr_debug("uri: %s, len: %zu\n", uri, uri_len);
153
154 sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
155 if (sdreq == NULL)
156 return NULL;
157
158 sdreq->tlv_len = uri_len + 3;
159
160 if (uri[uri_len - 1] == 0)
161 sdreq->tlv_len--;
162
163 sdreq->tlv = kzalloc(sdreq->tlv_len + 1, GFP_KERNEL);
164 if (sdreq->tlv == NULL) {
165 kfree(sdreq);
166 return NULL;
167 }
168
169 sdreq->tlv[0] = LLCP_TLV_SDREQ;
170 sdreq->tlv[1] = sdreq->tlv_len - 2;
171 sdreq->tlv[2] = tid;
172
173 sdreq->tid = tid;
174 sdreq->uri = sdreq->tlv + 3;
175 memcpy(sdreq->uri, uri, uri_len);
176
177 sdreq->time = jiffies;
178
179 INIT_HLIST_NODE(&sdreq->node);
180
181 return sdreq;
182}
183
184void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp)
185{
186 kfree(sdp->tlv);
187 kfree(sdp);
188}
189
190void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head)
191{
192 struct nfc_llcp_sdp_tlv *sdp;
193 struct hlist_node *n;
194
195 hlist_for_each_entry_safe(sdp, n, head, node) {
196 hlist_del(&sdp->node);
197
198 nfc_llcp_free_sdp_tlv(sdp);
199 }
200}
201
120int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, 202int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
121 u8 *tlv_array, u16 tlv_array_len) 203 u8 *tlv_array, u16 tlv_array_len)
122{ 204{
@@ -184,10 +266,10 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
184 266
185 switch (type) { 267 switch (type) {
186 case LLCP_TLV_MIUX: 268 case LLCP_TLV_MIUX:
187 sock->miu = llcp_tlv_miux(tlv) + 128; 269 sock->remote_miu = llcp_tlv_miux(tlv) + 128;
188 break; 270 break;
189 case LLCP_TLV_RW: 271 case LLCP_TLV_RW:
190 sock->rw = llcp_tlv_rw(tlv); 272 sock->remote_rw = llcp_tlv_rw(tlv);
191 break; 273 break;
192 case LLCP_TLV_SN: 274 case LLCP_TLV_SN:
193 break; 275 break;
@@ -200,7 +282,8 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
200 tlv += length + 2; 282 tlv += length + 2;
201 } 283 }
202 284
203 pr_debug("sock %p rw %d miu %d\n", sock, sock->rw, sock->miu); 285 pr_debug("sock %p rw %d miu %d\n", sock,
286 sock->remote_rw, sock->remote_miu);
204 287
205 return 0; 288 return 0;
206} 289}
@@ -318,9 +401,9 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
318 struct sk_buff *skb; 401 struct sk_buff *skb;
319 u8 *service_name_tlv = NULL, service_name_tlv_length; 402 u8 *service_name_tlv = NULL, service_name_tlv_length;
320 u8 *miux_tlv = NULL, miux_tlv_length; 403 u8 *miux_tlv = NULL, miux_tlv_length;
321 u8 *rw_tlv = NULL, rw_tlv_length; 404 u8 *rw_tlv = NULL, rw_tlv_length, rw;
322 int err; 405 int err;
323 u16 size = 0; 406 u16 size = 0, miux;
324 407
325 pr_debug("Sending CONNECT\n"); 408 pr_debug("Sending CONNECT\n");
326 409
@@ -336,11 +419,15 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
336 size += service_name_tlv_length; 419 size += service_name_tlv_length;
337 } 420 }
338 421
339 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, 422 /* If the socket parameters are not set, use the local ones */
423 miux = sock->miux > LLCP_MAX_MIUX ? local->miux : sock->miux;
424 rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw;
425
426 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
340 &miux_tlv_length); 427 &miux_tlv_length);
341 size += miux_tlv_length; 428 size += miux_tlv_length;
342 429
343 rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); 430 rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
344 size += rw_tlv_length; 431 size += rw_tlv_length;
345 432
346 pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); 433 pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len);
@@ -377,9 +464,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
377 struct nfc_llcp_local *local; 464 struct nfc_llcp_local *local;
378 struct sk_buff *skb; 465 struct sk_buff *skb;
379 u8 *miux_tlv = NULL, miux_tlv_length; 466 u8 *miux_tlv = NULL, miux_tlv_length;
380 u8 *rw_tlv = NULL, rw_tlv_length; 467 u8 *rw_tlv = NULL, rw_tlv_length, rw;
381 int err; 468 int err;
382 u16 size = 0; 469 u16 size = 0, miux;
383 470
384 pr_debug("Sending CC\n"); 471 pr_debug("Sending CC\n");
385 472
@@ -387,11 +474,15 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
387 if (local == NULL) 474 if (local == NULL)
388 return -ENODEV; 475 return -ENODEV;
389 476
390 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, 477 /* If the socket parameters are not set, use the local ones */
478 miux = sock->miux > LLCP_MAX_MIUX ? local->miux : sock->miux;
479 rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw;
480
481 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
391 &miux_tlv_length); 482 &miux_tlv_length);
392 size += miux_tlv_length; 483 size += miux_tlv_length;
393 484
394 rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); 485 rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
395 size += rw_tlv_length; 486 size += rw_tlv_length;
396 487
397 skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size); 488 skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size);
@@ -416,48 +507,90 @@ error_tlv:
416 return err; 507 return err;
417} 508}
418 509
419int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap) 510static struct sk_buff *nfc_llcp_allocate_snl(struct nfc_llcp_local *local,
511 size_t tlv_length)
420{ 512{
421 struct sk_buff *skb; 513 struct sk_buff *skb;
422 struct nfc_dev *dev; 514 struct nfc_dev *dev;
423 u8 *sdres_tlv = NULL, sdres_tlv_length, sdres[2];
424 u16 size = 0; 515 u16 size = 0;
425 516
426 pr_debug("Sending SNL tid 0x%x sap 0x%x\n", tid, sap);
427
428 if (local == NULL) 517 if (local == NULL)
429 return -ENODEV; 518 return ERR_PTR(-ENODEV);
430 519
431 dev = local->dev; 520 dev = local->dev;
432 if (dev == NULL) 521 if (dev == NULL)
433 return -ENODEV; 522 return ERR_PTR(-ENODEV);
434
435 sdres[0] = tid;
436 sdres[1] = sap;
437 sdres_tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, sdres, 0,
438 &sdres_tlv_length);
439 if (sdres_tlv == NULL)
440 return -ENOMEM;
441 523
442 size += LLCP_HEADER_SIZE; 524 size += LLCP_HEADER_SIZE;
443 size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE; 525 size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
444 size += sdres_tlv_length; 526 size += tlv_length;
445 527
446 skb = alloc_skb(size, GFP_KERNEL); 528 skb = alloc_skb(size, GFP_KERNEL);
447 if (skb == NULL) { 529 if (skb == NULL)
448 kfree(sdres_tlv); 530 return ERR_PTR(-ENOMEM);
449 return -ENOMEM;
450 }
451 531
452 skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE); 532 skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
453 533
454 skb = llcp_add_header(skb, LLCP_SAP_SDP, LLCP_SAP_SDP, LLCP_PDU_SNL); 534 skb = llcp_add_header(skb, LLCP_SAP_SDP, LLCP_SAP_SDP, LLCP_PDU_SNL);
455 535
456 memcpy(skb_put(skb, sdres_tlv_length), sdres_tlv, sdres_tlv_length); 536 return skb;
537}
538
539int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
540 struct hlist_head *tlv_list, size_t tlvs_len)
541{
542 struct nfc_llcp_sdp_tlv *sdp;
543 struct hlist_node *n;
544 struct sk_buff *skb;
545
546 skb = nfc_llcp_allocate_snl(local, tlvs_len);
547 if (IS_ERR(skb))
548 return PTR_ERR(skb);
549
550 hlist_for_each_entry_safe(sdp, n, tlv_list, node) {
551 memcpy(skb_put(skb, sdp->tlv_len), sdp->tlv, sdp->tlv_len);
552
553 hlist_del(&sdp->node);
554
555 nfc_llcp_free_sdp_tlv(sdp);
556 }
457 557
458 skb_queue_tail(&local->tx_queue, skb); 558 skb_queue_tail(&local->tx_queue, skb);
459 559
460 kfree(sdres_tlv); 560 return 0;
561}
562
563int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
564 struct hlist_head *tlv_list, size_t tlvs_len)
565{
566 struct nfc_llcp_sdp_tlv *sdreq;
567 struct hlist_node *n;
568 struct sk_buff *skb;
569
570 skb = nfc_llcp_allocate_snl(local, tlvs_len);
571 if (IS_ERR(skb))
572 return PTR_ERR(skb);
573
574 mutex_lock(&local->sdreq_lock);
575
576 if (hlist_empty(&local->pending_sdreqs))
577 mod_timer(&local->sdreq_timer,
578 jiffies + msecs_to_jiffies(3 * local->remote_lto));
579
580 hlist_for_each_entry_safe(sdreq, n, tlv_list, node) {
581 pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri);
582
583 memcpy(skb_put(skb, sdreq->tlv_len), sdreq->tlv,
584 sdreq->tlv_len);
585
586 hlist_del(&sdreq->node);
587
588 hlist_add_head(&sdreq->node, &local->pending_sdreqs);
589 }
590
591 mutex_unlock(&local->sdreq_lock);
592
593 skb_queue_tail(&local->tx_queue, skb);
461 594
462 return 0; 595 return 0;
463} 596}
@@ -532,8 +665,8 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
532 665
533 /* Remote is ready but has not acknowledged our frames */ 666 /* Remote is ready but has not acknowledged our frames */
534 if((sock->remote_ready && 667 if((sock->remote_ready &&
535 skb_queue_len(&sock->tx_pending_queue) >= sock->rw && 668 skb_queue_len(&sock->tx_pending_queue) >= sock->remote_rw &&
536 skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) { 669 skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {
537 pr_err("Pending queue is full %d frames\n", 670 pr_err("Pending queue is full %d frames\n",
538 skb_queue_len(&sock->tx_pending_queue)); 671 skb_queue_len(&sock->tx_pending_queue));
539 return -ENOBUFS; 672 return -ENOBUFS;
@@ -541,7 +674,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
541 674
542 /* Remote is not ready and we've been queueing enough frames */ 675 /* Remote is not ready and we've been queueing enough frames */
543 if ((!sock->remote_ready && 676 if ((!sock->remote_ready &&
544 skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) { 677 skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {
545 pr_err("Tx queue is full %d frames\n", 678 pr_err("Tx queue is full %d frames\n",
546 skb_queue_len(&sock->tx_queue)); 679 skb_queue_len(&sock->tx_queue));
547 return -ENOBUFS; 680 return -ENOBUFS;
@@ -561,7 +694,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
561 694
562 while (remaining_len > 0) { 695 while (remaining_len > 0) {
563 696
564 frag_len = min_t(size_t, sock->miu, remaining_len); 697 frag_len = min_t(size_t, sock->remote_miu, remaining_len);
565 698
566 pr_debug("Fragment %zd bytes remaining %zd", 699 pr_debug("Fragment %zd bytes remaining %zd",
567 frag_len, remaining_len); 700 frag_len, remaining_len);
@@ -621,7 +754,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
621 754
622 while (remaining_len > 0) { 755 while (remaining_len > 0) {
623 756
624 frag_len = min_t(size_t, sock->miu, remaining_len); 757 frag_len = min_t(size_t, sock->remote_miu, remaining_len);
625 758
626 pr_debug("Fragment %zd bytes remaining %zd", 759 pr_debug("Fragment %zd bytes remaining %zd",
627 frag_len, remaining_len); 760 frag_len, remaining_len);
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index ee25f25f0cd6..7de0368aff0c 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -182,6 +182,9 @@ static void local_cleanup(struct nfc_llcp_local *local, bool listen)
182 cancel_work_sync(&local->rx_work); 182 cancel_work_sync(&local->rx_work);
183 cancel_work_sync(&local->timeout_work); 183 cancel_work_sync(&local->timeout_work);
184 kfree_skb(local->rx_pending); 184 kfree_skb(local->rx_pending);
185 del_timer_sync(&local->sdreq_timer);
186 cancel_work_sync(&local->sdreq_timeout_work);
187 nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
185} 188}
186 189
187static void local_release(struct kref *ref) 190static void local_release(struct kref *ref)
@@ -259,6 +262,47 @@ static void nfc_llcp_symm_timer(unsigned long data)
259 schedule_work(&local->timeout_work); 262 schedule_work(&local->timeout_work);
260} 263}
261 264
265static void nfc_llcp_sdreq_timeout_work(struct work_struct *work)
266{
267 unsigned long time;
268 HLIST_HEAD(nl_sdres_list);
269 struct hlist_node *n;
270 struct nfc_llcp_sdp_tlv *sdp;
271 struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
272 sdreq_timeout_work);
273
274 mutex_lock(&local->sdreq_lock);
275
276 time = jiffies - msecs_to_jiffies(3 * local->remote_lto);
277
278 hlist_for_each_entry_safe(sdp, n, &local->pending_sdreqs, node) {
279 if (time_after(sdp->time, time))
280 continue;
281
282 sdp->sap = LLCP_SDP_UNBOUND;
283
284 hlist_del(&sdp->node);
285
286 hlist_add_head(&sdp->node, &nl_sdres_list);
287 }
288
289 if (!hlist_empty(&local->pending_sdreqs))
290 mod_timer(&local->sdreq_timer,
291 jiffies + msecs_to_jiffies(3 * local->remote_lto));
292
293 mutex_unlock(&local->sdreq_lock);
294
295 if (!hlist_empty(&nl_sdres_list))
296 nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
297}
298
299static void nfc_llcp_sdreq_timer(unsigned long data)
300{
301 struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
302
303 schedule_work(&local->sdreq_timeout_work);
304}
305
262struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) 306struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
263{ 307{
264 struct nfc_llcp_local *local, *n; 308 struct nfc_llcp_local *local, *n;
@@ -802,8 +846,6 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
802 ui_cb->dsap = dsap; 846 ui_cb->dsap = dsap;
803 ui_cb->ssap = ssap; 847 ui_cb->ssap = ssap;
804 848
805 printk("%s %d %d\n", __func__, dsap, ssap);
806
807 pr_debug("%d %d\n", dsap, ssap); 849 pr_debug("%d %d\n", dsap, ssap);
808 850
809 /* We're looking for a bound socket, not a client one */ 851 /* We're looking for a bound socket, not a client one */
@@ -900,7 +942,9 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
900 new_sock = nfc_llcp_sock(new_sk); 942 new_sock = nfc_llcp_sock(new_sk);
901 new_sock->dev = local->dev; 943 new_sock->dev = local->dev;
902 new_sock->local = nfc_llcp_local_get(local); 944 new_sock->local = nfc_llcp_local_get(local);
903 new_sock->miu = local->remote_miu; 945 new_sock->rw = sock->rw;
946 new_sock->miux = sock->miux;
947 new_sock->remote_miu = local->remote_miu;
904 new_sock->nfc_protocol = sock->nfc_protocol; 948 new_sock->nfc_protocol = sock->nfc_protocol;
905 new_sock->dsap = ssap; 949 new_sock->dsap = ssap;
906 new_sock->target_idx = local->target_idx; 950 new_sock->target_idx = local->target_idx;
@@ -954,11 +998,11 @@ int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock)
954 998
955 pr_debug("Remote ready %d tx queue len %d remote rw %d", 999 pr_debug("Remote ready %d tx queue len %d remote rw %d",
956 sock->remote_ready, skb_queue_len(&sock->tx_pending_queue), 1000 sock->remote_ready, skb_queue_len(&sock->tx_pending_queue),
957 sock->rw); 1001 sock->remote_rw);
958 1002
959 /* Try to queue some I frames for transmission */ 1003 /* Try to queue some I frames for transmission */
960 while (sock->remote_ready && 1004 while (sock->remote_ready &&
961 skb_queue_len(&sock->tx_pending_queue) < sock->rw) { 1005 skb_queue_len(&sock->tx_pending_queue) < sock->remote_rw) {
962 struct sk_buff *pdu; 1006 struct sk_buff *pdu;
963 1007
964 pdu = skb_dequeue(&sock->tx_queue); 1008 pdu = skb_dequeue(&sock->tx_queue);
@@ -1178,6 +1222,10 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
1178 u16 tlv_len, offset; 1222 u16 tlv_len, offset;
1179 char *service_name; 1223 char *service_name;
1180 size_t service_name_len; 1224 size_t service_name_len;
1225 struct nfc_llcp_sdp_tlv *sdp;
1226 HLIST_HEAD(llc_sdres_list);
1227 size_t sdres_tlvs_len;
1228 HLIST_HEAD(nl_sdres_list);
1181 1229
1182 dsap = nfc_llcp_dsap(skb); 1230 dsap = nfc_llcp_dsap(skb);
1183 ssap = nfc_llcp_ssap(skb); 1231 ssap = nfc_llcp_ssap(skb);
@@ -1192,6 +1240,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
1192 tlv = &skb->data[LLCP_HEADER_SIZE]; 1240 tlv = &skb->data[LLCP_HEADER_SIZE];
1193 tlv_len = skb->len - LLCP_HEADER_SIZE; 1241 tlv_len = skb->len - LLCP_HEADER_SIZE;
1194 offset = 0; 1242 offset = 0;
1243 sdres_tlvs_len = 0;
1195 1244
1196 while (offset < tlv_len) { 1245 while (offset < tlv_len) {
1197 type = tlv[0]; 1246 type = tlv[0];
@@ -1209,14 +1258,14 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
1209 !strncmp(service_name, "urn:nfc:sn:sdp", 1258 !strncmp(service_name, "urn:nfc:sn:sdp",
1210 service_name_len)) { 1259 service_name_len)) {
1211 sap = 1; 1260 sap = 1;
1212 goto send_snl; 1261 goto add_snl;
1213 } 1262 }
1214 1263
1215 llcp_sock = nfc_llcp_sock_from_sn(local, service_name, 1264 llcp_sock = nfc_llcp_sock_from_sn(local, service_name,
1216 service_name_len); 1265 service_name_len);
1217 if (!llcp_sock) { 1266 if (!llcp_sock) {
1218 sap = 0; 1267 sap = 0;
1219 goto send_snl; 1268 goto add_snl;
1220 } 1269 }
1221 1270
1222 /* 1271 /*
@@ -1233,7 +1282,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
1233 1282
1234 if (sap == LLCP_SAP_MAX) { 1283 if (sap == LLCP_SAP_MAX) {
1235 sap = 0; 1284 sap = 0;
1236 goto send_snl; 1285 goto add_snl;
1237 } 1286 }
1238 1287
1239 client_count = 1288 client_count =
@@ -1250,8 +1299,37 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
1250 1299
1251 pr_debug("%p %d\n", llcp_sock, sap); 1300 pr_debug("%p %d\n", llcp_sock, sap);
1252 1301
1253send_snl: 1302add_snl:
1254 nfc_llcp_send_snl(local, tid, sap); 1303 sdp = nfc_llcp_build_sdres_tlv(tid, sap);
1304 if (sdp == NULL)
1305 goto exit;
1306
1307 sdres_tlvs_len += sdp->tlv_len;
1308 hlist_add_head(&sdp->node, &llc_sdres_list);
1309 break;
1310
1311 case LLCP_TLV_SDRES:
1312 mutex_lock(&local->sdreq_lock);
1313
1314 pr_debug("LLCP_TLV_SDRES: searching tid %d\n", tlv[2]);
1315
1316 hlist_for_each_entry(sdp, &local->pending_sdreqs, node) {
1317 if (sdp->tid != tlv[2])
1318 continue;
1319
1320 sdp->sap = tlv[3];
1321
1322 pr_debug("Found: uri=%s, sap=%d\n",
1323 sdp->uri, sdp->sap);
1324
1325 hlist_del(&sdp->node);
1326
1327 hlist_add_head(&sdp->node, &nl_sdres_list);
1328
1329 break;
1330 }
1331
1332 mutex_unlock(&local->sdreq_lock);
1255 break; 1333 break;
1256 1334
1257 default: 1335 default:
@@ -1262,6 +1340,13 @@ send_snl:
1262 offset += length + 2; 1340 offset += length + 2;
1263 tlv += length + 2; 1341 tlv += length + 2;
1264 } 1342 }
1343
1344exit:
1345 if (!hlist_empty(&nl_sdres_list))
1346 nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
1347
1348 if (!hlist_empty(&llc_sdres_list))
1349 nfc_llcp_send_snl_sdres(local, &llc_sdres_list, sdres_tlvs_len);
1265} 1350}
1266 1351
1267static void nfc_llcp_rx_work(struct work_struct *work) 1352static void nfc_llcp_rx_work(struct work_struct *work)
@@ -1447,6 +1532,13 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
1447 local->remote_miu = LLCP_DEFAULT_MIU; 1532 local->remote_miu = LLCP_DEFAULT_MIU;
1448 local->remote_lto = LLCP_DEFAULT_LTO; 1533 local->remote_lto = LLCP_DEFAULT_LTO;
1449 1534
1535 mutex_init(&local->sdreq_lock);
1536 INIT_HLIST_HEAD(&local->pending_sdreqs);
1537 init_timer(&local->sdreq_timer);
1538 local->sdreq_timer.data = (unsigned long) local;
1539 local->sdreq_timer.function = nfc_llcp_sdreq_timer;
1540 INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
1541
1450 list_add(&local->list, &llcp_devices); 1542 list_add(&local->list, &llcp_devices);
1451 1543
1452 return 0; 1544 return 0;
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h
index 0eae5c509504..7e87a66b02ec 100644
--- a/net/nfc/llcp/llcp.h
+++ b/net/nfc/llcp/llcp.h
@@ -46,6 +46,19 @@ struct llcp_sock_list {
46 rwlock_t lock; 46 rwlock_t lock;
47}; 47};
48 48
49struct nfc_llcp_sdp_tlv {
50 u8 *tlv;
51 u8 tlv_len;
52
53 char *uri;
54 u8 tid;
55 u8 sap;
56
57 unsigned long time;
58
59 struct hlist_node node;
60};
61
49struct nfc_llcp_local { 62struct nfc_llcp_local {
50 struct list_head list; 63 struct list_head list;
51 struct nfc_dev *dev; 64 struct nfc_dev *dev;
@@ -86,6 +99,12 @@ struct nfc_llcp_local {
86 u8 remote_opt; 99 u8 remote_opt;
87 u16 remote_wks; 100 u16 remote_wks;
88 101
102 struct mutex sdreq_lock;
103 struct hlist_head pending_sdreqs;
104 struct timer_list sdreq_timer;
105 struct work_struct sdreq_timeout_work;
106 u8 sdreq_next_tid;
107
89 /* sockets array */ 108 /* sockets array */
90 struct llcp_sock_list sockets; 109 struct llcp_sock_list sockets;
91 struct llcp_sock_list connecting_sockets; 110 struct llcp_sock_list connecting_sockets;
@@ -105,7 +124,12 @@ struct nfc_llcp_sock {
105 char *service_name; 124 char *service_name;
106 size_t service_name_len; 125 size_t service_name_len;
107 u8 rw; 126 u8 rw;
108 u16 miu; 127 u16 miux;
128
129
130 /* Remote link parameters */
131 u8 remote_rw;
132 u16 remote_miu;
109 133
110 /* Link variables */ 134 /* Link variables */
111 u8 send_n; 135 u8 send_n;
@@ -213,12 +237,20 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
213/* Commands API */ 237/* Commands API */
214void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); 238void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
215u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length); 239u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
240struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap);
241struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
242 size_t uri_len);
243void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
244void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);
216void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); 245void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
217int nfc_llcp_disconnect(struct nfc_llcp_sock *sock); 246int nfc_llcp_disconnect(struct nfc_llcp_sock *sock);
218int nfc_llcp_send_symm(struct nfc_dev *dev); 247int nfc_llcp_send_symm(struct nfc_dev *dev);
219int nfc_llcp_send_connect(struct nfc_llcp_sock *sock); 248int nfc_llcp_send_connect(struct nfc_llcp_sock *sock);
220int nfc_llcp_send_cc(struct nfc_llcp_sock *sock); 249int nfc_llcp_send_cc(struct nfc_llcp_sock *sock);
221int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap); 250int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
251 struct hlist_head *tlv_list, size_t tlvs_len);
252int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
253 struct hlist_head *tlv_list, size_t tlvs_len);
222int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason); 254int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason);
223int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock); 255int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock);
224int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, 256int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index 6c94447ec414..c1101e6de170 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -223,6 +223,124 @@ error:
223 return ret; 223 return ret;
224} 224}
225 225
226static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
227 char __user *optval, unsigned int optlen)
228{
229 struct sock *sk = sock->sk;
230 struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
231 u32 opt;
232 int err = 0;
233
234 pr_debug("%p optname %d\n", sk, optname);
235
236 if (level != SOL_NFC)
237 return -ENOPROTOOPT;
238
239 lock_sock(sk);
240
241 switch (optname) {
242 case NFC_LLCP_RW:
243 if (sk->sk_state == LLCP_CONNECTED ||
244 sk->sk_state == LLCP_BOUND ||
245 sk->sk_state == LLCP_LISTEN) {
246 err = -EINVAL;
247 break;
248 }
249
250 if (get_user(opt, (u32 __user *) optval)) {
251 err = -EFAULT;
252 break;
253 }
254
255 if (opt > LLCP_MAX_RW) {
256 err = -EINVAL;
257 break;
258 }
259
260 llcp_sock->rw = (u8) opt;
261
262 break;
263
264 case NFC_LLCP_MIUX:
265 if (sk->sk_state == LLCP_CONNECTED ||
266 sk->sk_state == LLCP_BOUND ||
267 sk->sk_state == LLCP_LISTEN) {
268 err = -EINVAL;
269 break;
270 }
271
272 if (get_user(opt, (u32 __user *) optval)) {
273 err = -EFAULT;
274 break;
275 }
276
277 if (opt > LLCP_MAX_MIUX) {
278 err = -EINVAL;
279 break;
280 }
281
282 llcp_sock->miux = (u16) opt;
283
284 break;
285
286 default:
287 err = -ENOPROTOOPT;
288 break;
289 }
290
291 release_sock(sk);
292
293 pr_debug("%p rw %d miux %d\n", llcp_sock,
294 llcp_sock->rw, llcp_sock->miux);
295
296 return err;
297}
298
299static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname,
300 char __user *optval, int __user *optlen)
301{
302 struct sock *sk = sock->sk;
303 struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
304 int len, err = 0;
305
306 pr_debug("%p optname %d\n", sk, optname);
307
308 if (level != SOL_NFC)
309 return -ENOPROTOOPT;
310
311 if (get_user(len, optlen))
312 return -EFAULT;
313
314 len = min_t(u32, len, sizeof(u32));
315
316 lock_sock(sk);
317
318 switch (optname) {
319 case NFC_LLCP_RW:
320 if (put_user(llcp_sock->rw, (u32 __user *) optval))
321 err = -EFAULT;
322
323 break;
324
325 case NFC_LLCP_MIUX:
326 if (put_user(llcp_sock->miux, (u32 __user *) optval))
327 err = -EFAULT;
328
329 break;
330
331 default:
332 err = -ENOPROTOOPT;
333 break;
334 }
335
336 release_sock(sk);
337
338 if (put_user(len, optlen))
339 return -EFAULT;
340
341 return err;
342}
343
226void nfc_llcp_accept_unlink(struct sock *sk) 344void nfc_llcp_accept_unlink(struct sock *sk)
227{ 345{
228 struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); 346 struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -405,7 +523,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
405 return llcp_accept_poll(sk); 523 return llcp_accept_poll(sk);
406 524
407 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 525 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
408 mask |= POLLERR; 526 mask |= POLLERR |
527 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
409 528
410 if (!skb_queue_empty(&sk->sk_receive_queue)) 529 if (!skb_queue_empty(&sk->sk_receive_queue))
411 mask |= POLLIN | POLLRDNORM; 530 mask |= POLLIN | POLLRDNORM;
@@ -543,7 +662,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
543 662
544 llcp_sock->dev = dev; 663 llcp_sock->dev = dev;
545 llcp_sock->local = nfc_llcp_local_get(local); 664 llcp_sock->local = nfc_llcp_local_get(local);
546 llcp_sock->miu = llcp_sock->local->remote_miu; 665 llcp_sock->remote_miu = llcp_sock->local->remote_miu;
547 llcp_sock->ssap = nfc_llcp_get_local_ssap(local); 666 llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
548 if (llcp_sock->ssap == LLCP_SAP_MAX) { 667 if (llcp_sock->ssap == LLCP_SAP_MAX) {
549 ret = -ENOMEM; 668 ret = -ENOMEM;
@@ -740,8 +859,8 @@ static const struct proto_ops llcp_sock_ops = {
740 .ioctl = sock_no_ioctl, 859 .ioctl = sock_no_ioctl,
741 .listen = llcp_sock_listen, 860 .listen = llcp_sock_listen,
742 .shutdown = sock_no_shutdown, 861 .shutdown = sock_no_shutdown,
743 .setsockopt = sock_no_setsockopt, 862 .setsockopt = nfc_llcp_setsockopt,
744 .getsockopt = sock_no_getsockopt, 863 .getsockopt = nfc_llcp_getsockopt,
745 .sendmsg = llcp_sock_sendmsg, 864 .sendmsg = llcp_sock_sendmsg,
746 .recvmsg = llcp_sock_recvmsg, 865 .recvmsg = llcp_sock_recvmsg,
747 .mmap = sock_no_mmap, 866 .mmap = sock_no_mmap,
@@ -805,8 +924,10 @@ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp)
805 924
806 llcp_sock->ssap = 0; 925 llcp_sock->ssap = 0;
807 llcp_sock->dsap = LLCP_SAP_SDP; 926 llcp_sock->dsap = LLCP_SAP_SDP;
808 llcp_sock->rw = LLCP_DEFAULT_RW; 927 llcp_sock->rw = LLCP_MAX_RW + 1;
809 llcp_sock->miu = LLCP_DEFAULT_MIU; 928 llcp_sock->miux = LLCP_MAX_MIUX + 1;
929 llcp_sock->remote_rw = LLCP_DEFAULT_RW;
930 llcp_sock->remote_miu = LLCP_DEFAULT_MIU;
810 llcp_sock->send_n = llcp_sock->send_ack_n = 0; 931 llcp_sock->send_n = llcp_sock->send_ack_n = 0;
811 llcp_sock->recv_n = llcp_sock->recv_ack_n = 0; 932 llcp_sock->recv_n = llcp_sock->recv_ack_n = 0;
812 llcp_sock->remote_ready = 1; 933 llcp_sock->remote_ready = 1;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 504b883439f1..73fd51098f4d 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -53,6 +53,15 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
53 [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 }, 53 [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 },
54 [NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 }, 54 [NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 },
55 [NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 }, 55 [NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 },
56 [NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 },
57 [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 },
58 [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 },
59 [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
60};
61
62static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
63 [NFC_SDP_ATTR_URI] = { .type = NLA_STRING },
64 [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
56}; 65};
57 66
58static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, 67static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
@@ -348,6 +357,74 @@ free_msg:
348 return -EMSGSIZE; 357 return -EMSGSIZE;
349} 358}
350 359
360int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list)
361{
362 struct sk_buff *msg;
363 struct nlattr *sdp_attr, *uri_attr;
364 struct nfc_llcp_sdp_tlv *sdres;
365 struct hlist_node *n;
366 void *hdr;
367 int rc = -EMSGSIZE;
368 int i;
369
370 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
371 if (!msg)
372 return -ENOMEM;
373
374 hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
375 NFC_EVENT_LLC_SDRES);
376 if (!hdr)
377 goto free_msg;
378
379 if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
380 goto nla_put_failure;
381
382 sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP);
383 if (sdp_attr == NULL) {
384 rc = -ENOMEM;
385 goto nla_put_failure;
386 }
387
388 i = 1;
389 hlist_for_each_entry_safe(sdres, n, sdres_list, node) {
390 pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap);
391
392 uri_attr = nla_nest_start(msg, i++);
393 if (uri_attr == NULL) {
394 rc = -ENOMEM;
395 goto nla_put_failure;
396 }
397
398 if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap))
399 goto nla_put_failure;
400
401 if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri))
402 goto nla_put_failure;
403
404 nla_nest_end(msg, uri_attr);
405
406 hlist_del(&sdres->node);
407
408 nfc_llcp_free_sdp_tlv(sdres);
409 }
410
411 nla_nest_end(msg, sdp_attr);
412
413 genlmsg_end(msg, hdr);
414
415 return genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
416
417nla_put_failure:
418 genlmsg_cancel(msg, hdr);
419
420free_msg:
421 nlmsg_free(msg);
422
423 nfc_llcp_free_sdp_tlv_list(sdres_list);
424
425 return rc;
426}
427
351static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, 428static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
352 u32 portid, u32 seq, 429 u32 portid, u32 seq,
353 struct netlink_callback *cb, 430 struct netlink_callback *cb,
@@ -859,6 +936,96 @@ exit:
859 return rc; 936 return rc;
860} 937}
861 938
939static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
940{
941 struct nfc_dev *dev;
942 struct nfc_llcp_local *local;
943 struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1];
944 u32 idx;
945 u8 tid;
946 char *uri;
947 int rc = 0, rem;
948 size_t uri_len, tlvs_len;
949 struct hlist_head sdreq_list;
950 struct nfc_llcp_sdp_tlv *sdreq;
951
952 if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
953 !info->attrs[NFC_ATTR_LLC_SDP])
954 return -EINVAL;
955
956 idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
957
958 dev = nfc_get_device(idx);
959 if (!dev) {
960 rc = -ENODEV;
961 goto exit;
962 }
963
964 device_lock(&dev->dev);
965
966 if (dev->dep_link_up == false) {
967 rc = -ENOLINK;
968 goto exit;
969 }
970
971 local = nfc_llcp_find_local(dev);
972 if (!local) {
973 nfc_put_device(dev);
974 rc = -ENODEV;
975 goto exit;
976 }
977
978 INIT_HLIST_HEAD(&sdreq_list);
979
980 tlvs_len = 0;
981
982 nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) {
983 rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr,
984 nfc_sdp_genl_policy);
985
986 if (rc != 0) {
987 rc = -EINVAL;
988 goto exit;
989 }
990
991 if (!sdp_attrs[NFC_SDP_ATTR_URI])
992 continue;
993
994 uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]);
995 if (uri_len == 0)
996 continue;
997
998 uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]);
999 if (uri == NULL || *uri == 0)
1000 continue;
1001
1002 tid = local->sdreq_next_tid++;
1003
1004 sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len);
1005 if (sdreq == NULL) {
1006 rc = -ENOMEM;
1007 goto exit;
1008 }
1009
1010 tlvs_len += sdreq->tlv_len;
1011
1012 hlist_add_head(&sdreq->node, &sdreq_list);
1013 }
1014
1015 if (hlist_empty(&sdreq_list)) {
1016 rc = -EINVAL;
1017 goto exit;
1018 }
1019
1020 rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len);
1021exit:
1022 device_unlock(&dev->dev);
1023
1024 nfc_put_device(dev);
1025
1026 return rc;
1027}
1028
862static struct genl_ops nfc_genl_ops[] = { 1029static struct genl_ops nfc_genl_ops[] = {
863 { 1030 {
864 .cmd = NFC_CMD_GET_DEVICE, 1031 .cmd = NFC_CMD_GET_DEVICE,
@@ -913,6 +1080,11 @@ static struct genl_ops nfc_genl_ops[] = {
913 .doit = nfc_genl_llc_set_params, 1080 .doit = nfc_genl_llc_set_params,
914 .policy = nfc_genl_policy, 1081 .policy = nfc_genl_policy,
915 }, 1082 },
1083 {
1084 .cmd = NFC_CMD_LLC_SDREQ,
1085 .doit = nfc_genl_llc_sdreq,
1086 .policy = nfc_genl_policy,
1087 },
916}; 1088};
917 1089
918 1090
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 87d914d2876a..94bfe19ba678 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -46,6 +46,8 @@ struct nfc_rawsock {
46#define to_rawsock_sk(_tx_work) \ 46#define to_rawsock_sk(_tx_work) \
47 ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) 47 ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))
48 48
49struct nfc_llcp_sdp_tlv;
50
49#ifdef CONFIG_NFC_LLCP 51#ifdef CONFIG_NFC_LLCP
50 52
51void nfc_llcp_mac_is_down(struct nfc_dev *dev); 53void nfc_llcp_mac_is_down(struct nfc_dev *dev);
@@ -59,6 +61,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
59struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); 61struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
60int __init nfc_llcp_init(void); 62int __init nfc_llcp_init(void);
61void nfc_llcp_exit(void); 63void nfc_llcp_exit(void);
64void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
65void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head);
62 66
63#else 67#else
64 68
@@ -112,6 +116,14 @@ static inline void nfc_llcp_exit(void)
112{ 116{
113} 117}
114 118
119static inline void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp)
120{
121}
122
123static inline void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head)
124{
125}
126
115#endif 127#endif
116 128
117int __init rawsock_init(void); 129int __init rawsock_init(void);
@@ -144,6 +156,8 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev);
144int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol); 156int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol);
145int nfc_genl_tm_deactivated(struct nfc_dev *dev); 157int nfc_genl_tm_deactivated(struct nfc_dev *dev);
146 158
159int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list);
160
147struct nfc_dev *nfc_get_device(unsigned int idx); 161struct nfc_dev *nfc_get_device(unsigned int idx);
148 162
149static inline void nfc_put_device(struct nfc_dev *dev) 163static inline void nfc_put_device(struct nfc_dev *dev)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index d4d5363c7ba7..894b6cbdd929 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -98,7 +98,7 @@ static int pop_vlan(struct sk_buff *skb)
98 if (unlikely(err)) 98 if (unlikely(err))
99 return err; 99 return err;
100 100
101 __vlan_hwaccel_put_tag(skb, ntohs(tci)); 101 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
102 return 0; 102 return 0;
103} 103}
104 104
@@ -110,7 +110,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
110 /* push down current VLAN tag */ 110 /* push down current VLAN tag */
111 current_tag = vlan_tx_tag_get(skb); 111 current_tag = vlan_tx_tag_get(skb);
112 112
113 if (!__vlan_put_tag(skb, current_tag)) 113 if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
114 return -ENOMEM; 114 return -ENOMEM;
115 115
116 if (skb->ip_summed == CHECKSUM_COMPLETE) 116 if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -118,7 +118,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
118 + (2 * ETH_ALEN), VLAN_HLEN, 0)); 118 + (2 * ETH_ALEN), VLAN_HLEN, 0));
119 119
120 } 120 }
121 __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); 121 __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
122 return 0; 122 return 0;
123} 123}
124 124
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6980c3e6f066..d2f9f2e57298 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -44,6 +44,7 @@
44#include <linux/netfilter_ipv4.h> 44#include <linux/netfilter_ipv4.h>
45#include <linux/inetdevice.h> 45#include <linux/inetdevice.h>
46#include <linux/list.h> 46#include <linux/list.h>
47#include <linux/lockdep.h>
47#include <linux/openvswitch.h> 48#include <linux/openvswitch.h>
48#include <linux/rculist.h> 49#include <linux/rculist.h>
49#include <linux/dmi.h> 50#include <linux/dmi.h>
@@ -56,38 +57,59 @@
56#include "flow.h" 57#include "flow.h"
57#include "vport-internal_dev.h" 58#include "vport-internal_dev.h"
58 59
59/**
60 * struct ovs_net - Per net-namespace data for ovs.
61 * @dps: List of datapaths to enable dumping them all out.
62 * Protected by genl_mutex.
63 */
64struct ovs_net {
65 struct list_head dps;
66};
67
68static int ovs_net_id __read_mostly;
69 60
70#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) 61#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
71static void rehash_flow_table(struct work_struct *work); 62static void rehash_flow_table(struct work_struct *work);
72static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); 63static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
73 64
65int ovs_net_id __read_mostly;
66
67static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
68 struct genl_multicast_group *grp)
69{
70 genl_notify(skb, genl_info_net(info), info->snd_portid,
71 grp->id, info->nlhdr, GFP_KERNEL);
72}
73
74/** 74/**
75 * DOC: Locking: 75 * DOC: Locking:
76 * 76 *
77 * Writes to device state (add/remove datapath, port, set operations on vports, 77 * All writes e.g. Writes to device state (add/remove datapath, port, set
78 * etc.) are protected by RTNL. 78 * operations on vports, etc.), Writes to other state (flow table
79 * 79 * modifications, set miscellaneous datapath parameters, etc.) are protected
80 * Writes to other state (flow table modifications, set miscellaneous datapath 80 * by ovs_lock.
81 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
82 * genl_mutex.
83 * 81 *
84 * Reads are protected by RCU. 82 * Reads are protected by RCU.
85 * 83 *
86 * There are a few special cases (mostly stats) that have their own 84 * There are a few special cases (mostly stats) that have their own
87 * synchronization but they nest under all of above and don't interact with 85 * synchronization but they nest under all of above and don't interact with
88 * each other. 86 * each other.
87 *
88 * The RTNL lock nests inside ovs_mutex.
89 */ 89 */
90 90
91static DEFINE_MUTEX(ovs_mutex);
92
93void ovs_lock(void)
94{
95 mutex_lock(&ovs_mutex);
96}
97
98void ovs_unlock(void)
99{
100 mutex_unlock(&ovs_mutex);
101}
102
103#ifdef CONFIG_LOCKDEP
104int lockdep_ovsl_is_held(void)
105{
106 if (debug_locks)
107 return lockdep_is_held(&ovs_mutex);
108 else
109 return 1;
110}
111#endif
112
91static struct vport *new_vport(const struct vport_parms *); 113static struct vport *new_vport(const struct vport_parms *);
92static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, 114static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
93 const struct dp_upcall_info *); 115 const struct dp_upcall_info *);
@@ -95,7 +117,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex,
95 struct sk_buff *, 117 struct sk_buff *,
96 const struct dp_upcall_info *); 118 const struct dp_upcall_info *);
97 119
98/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ 120/* Must be called with rcu_read_lock or ovs_mutex. */
99static struct datapath *get_dp(struct net *net, int dp_ifindex) 121static struct datapath *get_dp(struct net *net, int dp_ifindex)
100{ 122{
101 struct datapath *dp = NULL; 123 struct datapath *dp = NULL;
@@ -113,10 +135,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
113 return dp; 135 return dp;
114} 136}
115 137
116/* Must be called with rcu_read_lock or RTNL lock. */ 138/* Must be called with rcu_read_lock or ovs_mutex. */
117const char *ovs_dp_name(const struct datapath *dp) 139const char *ovs_dp_name(const struct datapath *dp)
118{ 140{
119 struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); 141 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
120 return vport->ops->get_name(vport); 142 return vport->ops->get_name(vport);
121} 143}
122 144
@@ -168,7 +190,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
168 return NULL; 190 return NULL;
169} 191}
170 192
171/* Called with RTNL lock and genl_lock. */ 193/* Called with ovs_mutex. */
172static struct vport *new_vport(const struct vport_parms *parms) 194static struct vport *new_vport(const struct vport_parms *parms)
173{ 195{
174 struct vport *vport; 196 struct vport *vport;
@@ -180,14 +202,12 @@ static struct vport *new_vport(const struct vport_parms *parms)
180 202
181 hlist_add_head_rcu(&vport->dp_hash_node, head); 203 hlist_add_head_rcu(&vport->dp_hash_node, head);
182 } 204 }
183
184 return vport; 205 return vport;
185} 206}
186 207
187/* Called with RTNL lock. */
188void ovs_dp_detach_port(struct vport *p) 208void ovs_dp_detach_port(struct vport *p)
189{ 209{
190 ASSERT_RTNL(); 210 ASSERT_OVSL();
191 211
192 /* First drop references to device. */ 212 /* First drop references to device. */
193 hlist_del_rcu(&p->dp_hash_node); 213 hlist_del_rcu(&p->dp_hash_node);
@@ -337,6 +357,35 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
337 return err; 357 return err;
338} 358}
339 359
360static size_t key_attr_size(void)
361{
362 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
363 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
364 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
365 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
366 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
367 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
368 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
369 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
370 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
371 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
372 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
373}
374
375static size_t upcall_msg_size(const struct sk_buff *skb,
376 const struct nlattr *userdata)
377{
378 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
379 + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
380 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
381
382 /* OVS_PACKET_ATTR_USERDATA */
383 if (userdata)
384 size += NLA_ALIGN(userdata->nla_len);
385
386 return size;
387}
388
340static int queue_userspace_packet(struct net *net, int dp_ifindex, 389static int queue_userspace_packet(struct net *net, int dp_ifindex,
341 struct sk_buff *skb, 390 struct sk_buff *skb,
342 const struct dp_upcall_info *upcall_info) 391 const struct dp_upcall_info *upcall_info)
@@ -345,7 +394,6 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
345 struct sk_buff *nskb = NULL; 394 struct sk_buff *nskb = NULL;
346 struct sk_buff *user_skb; /* to be queued to userspace */ 395 struct sk_buff *user_skb; /* to be queued to userspace */
347 struct nlattr *nla; 396 struct nlattr *nla;
348 unsigned int len;
349 int err; 397 int err;
350 398
351 if (vlan_tx_tag_present(skb)) { 399 if (vlan_tx_tag_present(skb)) {
@@ -353,7 +401,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
353 if (!nskb) 401 if (!nskb)
354 return -ENOMEM; 402 return -ENOMEM;
355 403
356 nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb)); 404 nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
357 if (!nskb) 405 if (!nskb)
358 return -ENOMEM; 406 return -ENOMEM;
359 407
@@ -366,13 +414,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
366 goto out; 414 goto out;
367 } 415 }
368 416
369 len = sizeof(struct ovs_header); 417 user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
370 len += nla_total_size(skb->len);
371 len += nla_total_size(FLOW_BUFSIZE);
372 if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
373 len += nla_total_size(8);
374
375 user_skb = genlmsg_new(len, GFP_ATOMIC);
376 if (!user_skb) { 418 if (!user_skb) {
377 err = -ENOMEM; 419 err = -ENOMEM;
378 goto out; 420 goto out;
@@ -387,8 +429,9 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
387 nla_nest_end(user_skb, nla); 429 nla_nest_end(user_skb, nla);
388 430
389 if (upcall_info->userdata) 431 if (upcall_info->userdata)
390 nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, 432 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
391 nla_get_u64(upcall_info->userdata)); 433 nla_len(upcall_info->userdata),
434 nla_data(upcall_info->userdata));
392 435
393 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); 436 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
394 437
@@ -402,13 +445,13 @@ out:
402 return err; 445 return err;
403} 446}
404 447
405/* Called with genl_mutex. */ 448/* Called with ovs_mutex. */
406static int flush_flows(struct datapath *dp) 449static int flush_flows(struct datapath *dp)
407{ 450{
408 struct flow_table *old_table; 451 struct flow_table *old_table;
409 struct flow_table *new_table; 452 struct flow_table *new_table;
410 453
411 old_table = genl_dereference(dp->table); 454 old_table = ovsl_dereference(dp->table);
412 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); 455 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
413 if (!new_table) 456 if (!new_table)
414 return -ENOMEM; 457 return -ENOMEM;
@@ -544,7 +587,7 @@ static int validate_userspace(const struct nlattr *attr)
544{ 587{
545 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 588 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
546 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 589 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
547 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, 590 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
548 }; 591 };
549 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 592 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
550 int error; 593 int error;
@@ -661,8 +704,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
661 704
662 err = -EINVAL; 705 err = -EINVAL;
663 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 706 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
664 !a[OVS_PACKET_ATTR_ACTIONS] || 707 !a[OVS_PACKET_ATTR_ACTIONS])
665 nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
666 goto err; 708 goto err;
667 709
668 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 710 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
@@ -672,7 +714,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
672 goto err; 714 goto err;
673 skb_reserve(packet, NET_IP_ALIGN); 715 skb_reserve(packet, NET_IP_ALIGN);
674 716
675 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len); 717 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
676 718
677 skb_reset_mac_header(packet); 719 skb_reset_mac_header(packet);
678 eth = eth_hdr(packet); 720 eth = eth_hdr(packet);
@@ -680,7 +722,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
680 /* Normally, setting the skb 'protocol' field would be handled by a 722 /* Normally, setting the skb 'protocol' field would be handled by a
681 * call to eth_type_trans(), but it assumes there's a sending 723 * call to eth_type_trans(), but it assumes there's a sending
682 * device, which we may not have. */ 724 * device, which we may not have. */
683 if (ntohs(eth->h_proto) >= 1536) 725 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
684 packet->protocol = eth->h_proto; 726 packet->protocol = eth->h_proto;
685 else 727 else
686 packet->protocol = htons(ETH_P_802_2); 728 packet->protocol = htons(ETH_P_802_2);
@@ -743,7 +785,7 @@ err:
743} 785}
744 786
745static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 787static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
746 [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, 788 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
747 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 789 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
748 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 790 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
749}; 791};
@@ -759,7 +801,7 @@ static struct genl_ops dp_packet_genl_ops[] = {
759static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) 801static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
760{ 802{
761 int i; 803 int i;
762 struct flow_table *table = genl_dereference(dp->table); 804 struct flow_table *table = ovsl_dereference(dp->table);
763 805
764 stats->n_flows = ovs_flow_tbl_count(table); 806 stats->n_flows = ovs_flow_tbl_count(table);
765 807
@@ -801,7 +843,17 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = {
801 .name = OVS_FLOW_MCGROUP 843 .name = OVS_FLOW_MCGROUP
802}; 844};
803 845
804/* Called with genl_lock. */ 846static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
847{
848 return NLMSG_ALIGN(sizeof(struct ovs_header))
849 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
850 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
851 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
852 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
853 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
854}
855
856/* Called with ovs_mutex. */
805static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, 857static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
806 struct sk_buff *skb, u32 portid, 858 struct sk_buff *skb, u32 portid,
807 u32 seq, u32 flags, u8 cmd) 859 u32 seq, u32 flags, u8 cmd)
@@ -815,8 +867,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
815 u8 tcp_flags; 867 u8 tcp_flags;
816 int err; 868 int err;
817 869
818 sf_acts = rcu_dereference_protected(flow->sf_acts, 870 sf_acts = ovsl_dereference(flow->sf_acts);
819 lockdep_genl_is_held());
820 871
821 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 872 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
822 if (!ovs_header) 873 if (!ovs_header)
@@ -879,25 +930,10 @@ error:
879static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) 930static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
880{ 931{
881 const struct sw_flow_actions *sf_acts; 932 const struct sw_flow_actions *sf_acts;
882 int len;
883 933
884 sf_acts = rcu_dereference_protected(flow->sf_acts, 934 sf_acts = ovsl_dereference(flow->sf_acts);
885 lockdep_genl_is_held());
886 935
887 /* OVS_FLOW_ATTR_KEY */ 936 return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
888 len = nla_total_size(FLOW_BUFSIZE);
889 /* OVS_FLOW_ATTR_ACTIONS */
890 len += nla_total_size(sf_acts->actions_len);
891 /* OVS_FLOW_ATTR_STATS */
892 len += nla_total_size(sizeof(struct ovs_flow_stats));
893 /* OVS_FLOW_ATTR_TCP_FLAGS */
894 len += nla_total_size(1);
895 /* OVS_FLOW_ATTR_USED */
896 len += nla_total_size(8);
897
898 len += NLMSG_ALIGN(sizeof(struct ovs_header));
899
900 return genlmsg_new(len, GFP_KERNEL);
901} 937}
902 938
903static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, 939static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
@@ -946,12 +982,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
946 goto error; 982 goto error;
947 } 983 }
948 984
985 ovs_lock();
949 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 986 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
950 error = -ENODEV; 987 error = -ENODEV;
951 if (!dp) 988 if (!dp)
952 goto error; 989 goto err_unlock_ovs;
953 990
954 table = genl_dereference(dp->table); 991 table = ovsl_dereference(dp->table);
955 flow = ovs_flow_tbl_lookup(table, &key, key_len); 992 flow = ovs_flow_tbl_lookup(table, &key, key_len);
956 if (!flow) { 993 if (!flow) {
957 struct sw_flow_actions *acts; 994 struct sw_flow_actions *acts;
@@ -959,7 +996,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
959 /* Bail out if we're not allowed to create a new flow. */ 996 /* Bail out if we're not allowed to create a new flow. */
960 error = -ENOENT; 997 error = -ENOENT;
961 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 998 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
962 goto error; 999 goto err_unlock_ovs;
963 1000
964 /* Expand table, if necessary, to make room. */ 1001 /* Expand table, if necessary, to make room. */
965 if (ovs_flow_tbl_need_to_expand(table)) { 1002 if (ovs_flow_tbl_need_to_expand(table)) {
@@ -969,7 +1006,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
969 if (!IS_ERR(new_table)) { 1006 if (!IS_ERR(new_table)) {
970 rcu_assign_pointer(dp->table, new_table); 1007 rcu_assign_pointer(dp->table, new_table);
971 ovs_flow_tbl_deferred_destroy(table); 1008 ovs_flow_tbl_deferred_destroy(table);
972 table = genl_dereference(dp->table); 1009 table = ovsl_dereference(dp->table);
973 } 1010 }
974 } 1011 }
975 1012
@@ -977,7 +1014,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
977 flow = ovs_flow_alloc(); 1014 flow = ovs_flow_alloc();
978 if (IS_ERR(flow)) { 1015 if (IS_ERR(flow)) {
979 error = PTR_ERR(flow); 1016 error = PTR_ERR(flow);
980 goto error; 1017 goto err_unlock_ovs;
981 } 1018 }
982 flow->key = key; 1019 flow->key = key;
983 clear_stats(flow); 1020 clear_stats(flow);
@@ -1010,11 +1047,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1010 error = -EEXIST; 1047 error = -EEXIST;
1011 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && 1048 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1012 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) 1049 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1013 goto error; 1050 goto err_unlock_ovs;
1014 1051
1015 /* Update actions. */ 1052 /* Update actions. */
1016 old_acts = rcu_dereference_protected(flow->sf_acts, 1053 old_acts = ovsl_dereference(flow->sf_acts);
1017 lockdep_genl_is_held());
1018 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; 1054 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1019 if (acts_attrs && 1055 if (acts_attrs &&
1020 (old_acts->actions_len != nla_len(acts_attrs) || 1056 (old_acts->actions_len != nla_len(acts_attrs) ||
@@ -1025,7 +1061,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1025 new_acts = ovs_flow_actions_alloc(acts_attrs); 1061 new_acts = ovs_flow_actions_alloc(acts_attrs);
1026 error = PTR_ERR(new_acts); 1062 error = PTR_ERR(new_acts);
1027 if (IS_ERR(new_acts)) 1063 if (IS_ERR(new_acts))
1028 goto error; 1064 goto err_unlock_ovs;
1029 1065
1030 rcu_assign_pointer(flow->sf_acts, new_acts); 1066 rcu_assign_pointer(flow->sf_acts, new_acts);
1031 ovs_flow_deferred_free_acts(old_acts); 1067 ovs_flow_deferred_free_acts(old_acts);
@@ -1041,11 +1077,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1041 spin_unlock_bh(&flow->lock); 1077 spin_unlock_bh(&flow->lock);
1042 } 1078 }
1043 } 1079 }
1080 ovs_unlock();
1044 1081
1045 if (!IS_ERR(reply)) 1082 if (!IS_ERR(reply))
1046 genl_notify(reply, genl_info_net(info), info->snd_portid, 1083 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1047 ovs_dp_flow_multicast_group.id, info->nlhdr,
1048 GFP_KERNEL);
1049 else 1084 else
1050 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1085 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1051 ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); 1086 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
@@ -1053,6 +1088,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1053 1088
1054error_free_flow: 1089error_free_flow:
1055 ovs_flow_free(flow); 1090 ovs_flow_free(flow);
1091err_unlock_ovs:
1092 ovs_unlock();
1056error: 1093error:
1057 return error; 1094 return error;
1058} 1095}
@@ -1075,21 +1112,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1075 if (err) 1112 if (err)
1076 return err; 1113 return err;
1077 1114
1115 ovs_lock();
1078 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1116 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1079 if (!dp) 1117 if (!dp) {
1080 return -ENODEV; 1118 err = -ENODEV;
1119 goto unlock;
1120 }
1081 1121
1082 table = genl_dereference(dp->table); 1122 table = ovsl_dereference(dp->table);
1083 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1123 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1084 if (!flow) 1124 if (!flow) {
1085 return -ENOENT; 1125 err = -ENOENT;
1126 goto unlock;
1127 }
1086 1128
1087 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1129 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1088 info->snd_seq, OVS_FLOW_CMD_NEW); 1130 info->snd_seq, OVS_FLOW_CMD_NEW);
1089 if (IS_ERR(reply)) 1131 if (IS_ERR(reply)) {
1090 return PTR_ERR(reply); 1132 err = PTR_ERR(reply);
1133 goto unlock;
1134 }
1091 1135
1136 ovs_unlock();
1092 return genlmsg_reply(reply, info); 1137 return genlmsg_reply(reply, info);
1138unlock:
1139 ovs_unlock();
1140 return err;
1093} 1141}
1094 1142
1095static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1143static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -1104,25 +1152,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1104 int err; 1152 int err;
1105 int key_len; 1153 int key_len;
1106 1154
1155 ovs_lock();
1107 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1156 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1108 if (!dp) 1157 if (!dp) {
1109 return -ENODEV; 1158 err = -ENODEV;
1110 1159 goto unlock;
1111 if (!a[OVS_FLOW_ATTR_KEY]) 1160 }
1112 return flush_flows(dp);
1113 1161
1162 if (!a[OVS_FLOW_ATTR_KEY]) {
1163 err = flush_flows(dp);
1164 goto unlock;
1165 }
1114 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1166 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1115 if (err) 1167 if (err)
1116 return err; 1168 goto unlock;
1117 1169
1118 table = genl_dereference(dp->table); 1170 table = ovsl_dereference(dp->table);
1119 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1171 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1120 if (!flow) 1172 if (!flow) {
1121 return -ENOENT; 1173 err = -ENOENT;
1174 goto unlock;
1175 }
1122 1176
1123 reply = ovs_flow_cmd_alloc_info(flow); 1177 reply = ovs_flow_cmd_alloc_info(flow);
1124 if (!reply) 1178 if (!reply) {
1125 return -ENOMEM; 1179 err = -ENOMEM;
1180 goto unlock;
1181 }
1126 1182
1127 ovs_flow_tbl_remove(table, flow); 1183 ovs_flow_tbl_remove(table, flow);
1128 1184
@@ -1131,10 +1187,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1131 BUG_ON(err < 0); 1187 BUG_ON(err < 0);
1132 1188
1133 ovs_flow_deferred_free(flow); 1189 ovs_flow_deferred_free(flow);
1190 ovs_unlock();
1134 1191
1135 genl_notify(reply, genl_info_net(info), info->snd_portid, 1192 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1136 ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1137 return 0; 1193 return 0;
1194unlock:
1195 ovs_unlock();
1196 return err;
1138} 1197}
1139 1198
1140static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1199static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1143,11 +1202,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1143 struct datapath *dp; 1202 struct datapath *dp;
1144 struct flow_table *table; 1203 struct flow_table *table;
1145 1204
1205 ovs_lock();
1146 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1206 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1147 if (!dp) 1207 if (!dp) {
1208 ovs_unlock();
1148 return -ENODEV; 1209 return -ENODEV;
1210 }
1149 1211
1150 table = genl_dereference(dp->table); 1212 table = ovsl_dereference(dp->table);
1151 1213
1152 for (;;) { 1214 for (;;) {
1153 struct sw_flow *flow; 1215 struct sw_flow *flow;
@@ -1168,6 +1230,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1168 cb->args[0] = bucket; 1230 cb->args[0] = bucket;
1169 cb->args[1] = obj; 1231 cb->args[1] = obj;
1170 } 1232 }
1233 ovs_unlock();
1171 return skb->len; 1234 return skb->len;
1172} 1235}
1173 1236
@@ -1213,6 +1276,16 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1213 .name = OVS_DATAPATH_MCGROUP 1276 .name = OVS_DATAPATH_MCGROUP
1214}; 1277};
1215 1278
1279static size_t ovs_dp_cmd_msg_size(void)
1280{
1281 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1282
1283 msgsize += nla_total_size(IFNAMSIZ);
1284 msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1285
1286 return msgsize;
1287}
1288
1216static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1289static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1217 u32 portid, u32 seq, u32 flags, u8 cmd) 1290 u32 portid, u32 seq, u32 flags, u8 cmd)
1218{ 1291{
@@ -1251,7 +1324,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1251 struct sk_buff *skb; 1324 struct sk_buff *skb;
1252 int retval; 1325 int retval;
1253 1326
1254 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1327 skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1255 if (!skb) 1328 if (!skb)
1256 return ERR_PTR(-ENOMEM); 1329 return ERR_PTR(-ENOMEM);
1257 1330
@@ -1263,7 +1336,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1263 return skb; 1336 return skb;
1264} 1337}
1265 1338
1266/* Called with genl_mutex and optionally with RTNL lock also. */ 1339/* Called with ovs_mutex. */
1267static struct datapath *lookup_datapath(struct net *net, 1340static struct datapath *lookup_datapath(struct net *net,
1268 struct ovs_header *ovs_header, 1341 struct ovs_header *ovs_header,
1269 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1342 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1297,12 +1370,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1297 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1370 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1298 goto err; 1371 goto err;
1299 1372
1300 rtnl_lock(); 1373 ovs_lock();
1301 1374
1302 err = -ENOMEM; 1375 err = -ENOMEM;
1303 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1376 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1304 if (dp == NULL) 1377 if (dp == NULL)
1305 goto err_unlock_rtnl; 1378 goto err_unlock_ovs;
1306 1379
1307 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1380 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1308 1381
@@ -1353,37 +1426,34 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1353 1426
1354 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1427 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1355 list_add_tail(&dp->list_node, &ovs_net->dps); 1428 list_add_tail(&dp->list_node, &ovs_net->dps);
1356 rtnl_unlock();
1357 1429
1358 genl_notify(reply, genl_info_net(info), info->snd_portid, 1430 ovs_unlock();
1359 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1431
1360 GFP_KERNEL); 1432 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1361 return 0; 1433 return 0;
1362 1434
1363err_destroy_local_port: 1435err_destroy_local_port:
1364 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); 1436 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1365err_destroy_ports_array: 1437err_destroy_ports_array:
1366 kfree(dp->ports); 1438 kfree(dp->ports);
1367err_destroy_percpu: 1439err_destroy_percpu:
1368 free_percpu(dp->stats_percpu); 1440 free_percpu(dp->stats_percpu);
1369err_destroy_table: 1441err_destroy_table:
1370 ovs_flow_tbl_destroy(genl_dereference(dp->table)); 1442 ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
1371err_free_dp: 1443err_free_dp:
1372 release_net(ovs_dp_get_net(dp)); 1444 release_net(ovs_dp_get_net(dp));
1373 kfree(dp); 1445 kfree(dp);
1374err_unlock_rtnl: 1446err_unlock_ovs:
1375 rtnl_unlock(); 1447 ovs_unlock();
1376err: 1448err:
1377 return err; 1449 return err;
1378} 1450}
1379 1451
1380/* Called with genl_mutex. */ 1452/* Called with ovs_mutex. */
1381static void __dp_destroy(struct datapath *dp) 1453static void __dp_destroy(struct datapath *dp)
1382{ 1454{
1383 int i; 1455 int i;
1384 1456
1385 rtnl_lock();
1386
1387 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1457 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1388 struct vport *vport; 1458 struct vport *vport;
1389 struct hlist_node *n; 1459 struct hlist_node *n;
@@ -1394,14 +1464,11 @@ static void __dp_destroy(struct datapath *dp)
1394 } 1464 }
1395 1465
1396 list_del(&dp->list_node); 1466 list_del(&dp->list_node);
1397 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1398 1467
1399 /* rtnl_unlock() will wait until all the references to devices that 1468 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1400 * are pending unregistration have been dropped. We do it here to 1469 * all port in datapath are destroyed first before freeing datapath.
1401 * ensure that any internal devices (which contain DP pointers) are
1402 * fully destroyed before freeing the datapath.
1403 */ 1470 */
1404 rtnl_unlock(); 1471 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1405 1472
1406 call_rcu(&dp->rcu, destroy_dp_rcu); 1473 call_rcu(&dp->rcu, destroy_dp_rcu);
1407} 1474}
@@ -1412,24 +1479,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1412 struct datapath *dp; 1479 struct datapath *dp;
1413 int err; 1480 int err;
1414 1481
1482 ovs_lock();
1415 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1483 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1416 err = PTR_ERR(dp); 1484 err = PTR_ERR(dp);
1417 if (IS_ERR(dp)) 1485 if (IS_ERR(dp))
1418 return err; 1486 goto unlock;
1419 1487
1420 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1488 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1421 info->snd_seq, OVS_DP_CMD_DEL); 1489 info->snd_seq, OVS_DP_CMD_DEL);
1422 err = PTR_ERR(reply); 1490 err = PTR_ERR(reply);
1423 if (IS_ERR(reply)) 1491 if (IS_ERR(reply))
1424 return err; 1492 goto unlock;
1425 1493
1426 __dp_destroy(dp); 1494 __dp_destroy(dp);
1495 ovs_unlock();
1427 1496
1428 genl_notify(reply, genl_info_net(info), info->snd_portid, 1497 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1429 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1430 GFP_KERNEL);
1431 1498
1432 return 0; 1499 return 0;
1500unlock:
1501 ovs_unlock();
1502 return err;
1433} 1503}
1434 1504
1435static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1505static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1438,9 +1508,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1438 struct datapath *dp; 1508 struct datapath *dp;
1439 int err; 1509 int err;
1440 1510
1511 ovs_lock();
1441 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1512 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1513 err = PTR_ERR(dp);
1442 if (IS_ERR(dp)) 1514 if (IS_ERR(dp))
1443 return PTR_ERR(dp); 1515 goto unlock;
1444 1516
1445 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1517 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1446 info->snd_seq, OVS_DP_CMD_NEW); 1518 info->snd_seq, OVS_DP_CMD_NEW);
@@ -1448,31 +1520,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1448 err = PTR_ERR(reply); 1520 err = PTR_ERR(reply);
1449 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1521 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1450 ovs_dp_datapath_multicast_group.id, err); 1522 ovs_dp_datapath_multicast_group.id, err);
1451 return 0; 1523 err = 0;
1524 goto unlock;
1452 } 1525 }
1453 1526
1454 genl_notify(reply, genl_info_net(info), info->snd_portid, 1527 ovs_unlock();
1455 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1528 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1456 GFP_KERNEL);
1457 1529
1458 return 0; 1530 return 0;
1531unlock:
1532 ovs_unlock();
1533 return err;
1459} 1534}
1460 1535
1461static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1536static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1462{ 1537{
1463 struct sk_buff *reply; 1538 struct sk_buff *reply;
1464 struct datapath *dp; 1539 struct datapath *dp;
1540 int err;
1465 1541
1542 ovs_lock();
1466 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1543 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1467 if (IS_ERR(dp)) 1544 if (IS_ERR(dp)) {
1468 return PTR_ERR(dp); 1545 err = PTR_ERR(dp);
1546 goto unlock;
1547 }
1469 1548
1470 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1549 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1471 info->snd_seq, OVS_DP_CMD_NEW); 1550 info->snd_seq, OVS_DP_CMD_NEW);
1472 if (IS_ERR(reply)) 1551 if (IS_ERR(reply)) {
1473 return PTR_ERR(reply); 1552 err = PTR_ERR(reply);
1553 goto unlock;
1554 }
1474 1555
1556 ovs_unlock();
1475 return genlmsg_reply(reply, info); 1557 return genlmsg_reply(reply, info);
1558
1559unlock:
1560 ovs_unlock();
1561 return err;
1476} 1562}
1477 1563
1478static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1564static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1482,6 +1568,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1482 int skip = cb->args[0]; 1568 int skip = cb->args[0];
1483 int i = 0; 1569 int i = 0;
1484 1570
1571 ovs_lock();
1485 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1572 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1486 if (i >= skip && 1573 if (i >= skip &&
1487 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1574 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
@@ -1490,6 +1577,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1490 break; 1577 break;
1491 i++; 1578 i++;
1492 } 1579 }
1580 ovs_unlock();
1493 1581
1494 cb->args[0] = i; 1582 cb->args[0] = i;
1495 1583
@@ -1542,7 +1630,7 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = {
1542 .name = OVS_VPORT_MCGROUP 1630 .name = OVS_VPORT_MCGROUP
1543}; 1631};
1544 1632
1545/* Called with RTNL lock or RCU read lock. */ 1633/* Called with ovs_mutex or RCU read lock. */
1546static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1634static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1547 u32 portid, u32 seq, u32 flags, u8 cmd) 1635 u32 portid, u32 seq, u32 flags, u8 cmd)
1548{ 1636{
@@ -1581,7 +1669,7 @@ error:
1581 return err; 1669 return err;
1582} 1670}
1583 1671
1584/* Called with RTNL lock or RCU read lock. */ 1672/* Called with ovs_mutex or RCU read lock. */
1585struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1673struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1586 u32 seq, u8 cmd) 1674 u32 seq, u8 cmd)
1587{ 1675{
@@ -1598,7 +1686,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1598 return skb; 1686 return skb;
1599} 1687}
1600 1688
1601/* Called with RTNL lock or RCU read lock. */ 1689/* Called with ovs_mutex or RCU read lock. */
1602static struct vport *lookup_vport(struct net *net, 1690static struct vport *lookup_vport(struct net *net,
1603 struct ovs_header *ovs_header, 1691 struct ovs_header *ovs_header,
1604 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1692 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
@@ -1624,9 +1712,9 @@ static struct vport *lookup_vport(struct net *net,
1624 if (!dp) 1712 if (!dp)
1625 return ERR_PTR(-ENODEV); 1713 return ERR_PTR(-ENODEV);
1626 1714
1627 vport = ovs_vport_rtnl_rcu(dp, port_no); 1715 vport = ovs_vport_ovsl_rcu(dp, port_no);
1628 if (!vport) 1716 if (!vport)
1629 return ERR_PTR(-ENOENT); 1717 return ERR_PTR(-ENODEV);
1630 return vport; 1718 return vport;
1631 } else 1719 } else
1632 return ERR_PTR(-EINVAL); 1720 return ERR_PTR(-EINVAL);
@@ -1648,7 +1736,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1648 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1736 !a[OVS_VPORT_ATTR_UPCALL_PID])
1649 goto exit; 1737 goto exit;
1650 1738
1651 rtnl_lock(); 1739 ovs_lock();
1652 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1740 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1653 err = -ENODEV; 1741 err = -ENODEV;
1654 if (!dp) 1742 if (!dp)
@@ -1661,7 +1749,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1661 if (port_no >= DP_MAX_PORTS) 1749 if (port_no >= DP_MAX_PORTS)
1662 goto exit_unlock; 1750 goto exit_unlock;
1663 1751
1664 vport = ovs_vport_rtnl_rcu(dp, port_no); 1752 vport = ovs_vport_ovsl(dp, port_no);
1665 err = -EBUSY; 1753 err = -EBUSY;
1666 if (vport) 1754 if (vport)
1667 goto exit_unlock; 1755 goto exit_unlock;
@@ -1671,7 +1759,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1671 err = -EFBIG; 1759 err = -EFBIG;
1672 goto exit_unlock; 1760 goto exit_unlock;
1673 } 1761 }
1674 vport = ovs_vport_rtnl(dp, port_no); 1762 vport = ovs_vport_ovsl(dp, port_no);
1675 if (!vport) 1763 if (!vport)
1676 break; 1764 break;
1677 } 1765 }
@@ -1697,11 +1785,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1697 ovs_dp_detach_port(vport); 1785 ovs_dp_detach_port(vport);
1698 goto exit_unlock; 1786 goto exit_unlock;
1699 } 1787 }
1700 genl_notify(reply, genl_info_net(info), info->snd_portid, 1788
1701 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1789 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1702 1790
1703exit_unlock: 1791exit_unlock:
1704 rtnl_unlock(); 1792 ovs_unlock();
1705exit: 1793exit:
1706 return err; 1794 return err;
1707} 1795}
@@ -1713,7 +1801,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1713 struct vport *vport; 1801 struct vport *vport;
1714 int err; 1802 int err;
1715 1803
1716 rtnl_lock(); 1804 ovs_lock();
1717 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1805 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1718 err = PTR_ERR(vport); 1806 err = PTR_ERR(vport);
1719 if (IS_ERR(vport)) 1807 if (IS_ERR(vport))
@@ -1742,8 +1830,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1742 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1830 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1743 BUG_ON(err < 0); 1831 BUG_ON(err < 0);
1744 1832
1745 genl_notify(reply, genl_info_net(info), info->snd_portid, 1833 ovs_unlock();
1746 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1834 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1835 return 0;
1747 1836
1748 rtnl_unlock(); 1837 rtnl_unlock();
1749 return 0; 1838 return 0;
@@ -1751,7 +1840,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1751exit_free: 1840exit_free:
1752 kfree_skb(reply); 1841 kfree_skb(reply);
1753exit_unlock: 1842exit_unlock:
1754 rtnl_unlock(); 1843 ovs_unlock();
1755 return err; 1844 return err;
1756} 1845}
1757 1846
@@ -1762,7 +1851,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1762 struct vport *vport; 1851 struct vport *vport;
1763 int err; 1852 int err;
1764 1853
1765 rtnl_lock(); 1854 ovs_lock();
1766 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1855 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1767 err = PTR_ERR(vport); 1856 err = PTR_ERR(vport);
1768 if (IS_ERR(vport)) 1857 if (IS_ERR(vport))
@@ -1782,11 +1871,10 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1782 err = 0; 1871 err = 0;
1783 ovs_dp_detach_port(vport); 1872 ovs_dp_detach_port(vport);
1784 1873
1785 genl_notify(reply, genl_info_net(info), info->snd_portid, 1874 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1786 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1787 1875
1788exit_unlock: 1876exit_unlock:
1789 rtnl_unlock(); 1877 ovs_unlock();
1790 return err; 1878 return err;
1791} 1879}
1792 1880
@@ -1946,13 +2034,13 @@ static void rehash_flow_table(struct work_struct *work)
1946 struct datapath *dp; 2034 struct datapath *dp;
1947 struct net *net; 2035 struct net *net;
1948 2036
1949 genl_lock(); 2037 ovs_lock();
1950 rtnl_lock(); 2038 rtnl_lock();
1951 for_each_net(net) { 2039 for_each_net(net) {
1952 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2040 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1953 2041
1954 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2042 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1955 struct flow_table *old_table = genl_dereference(dp->table); 2043 struct flow_table *old_table = ovsl_dereference(dp->table);
1956 struct flow_table *new_table; 2044 struct flow_table *new_table;
1957 2045
1958 new_table = ovs_flow_tbl_rehash(old_table); 2046 new_table = ovs_flow_tbl_rehash(old_table);
@@ -1963,8 +2051,7 @@ static void rehash_flow_table(struct work_struct *work)
1963 } 2051 }
1964 } 2052 }
1965 rtnl_unlock(); 2053 rtnl_unlock();
1966 genl_unlock(); 2054 ovs_unlock();
1967
1968 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 2055 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1969} 2056}
1970 2057
@@ -1973,18 +2060,21 @@ static int __net_init ovs_init_net(struct net *net)
1973 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2060 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1974 2061
1975 INIT_LIST_HEAD(&ovs_net->dps); 2062 INIT_LIST_HEAD(&ovs_net->dps);
2063 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
1976 return 0; 2064 return 0;
1977} 2065}
1978 2066
1979static void __net_exit ovs_exit_net(struct net *net) 2067static void __net_exit ovs_exit_net(struct net *net)
1980{ 2068{
1981 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1982 struct datapath *dp, *dp_next; 2069 struct datapath *dp, *dp_next;
2070 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1983 2071
1984 genl_lock(); 2072 ovs_lock();
1985 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2073 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1986 __dp_destroy(dp); 2074 __dp_destroy(dp);
1987 genl_unlock(); 2075 ovs_unlock();
2076
2077 cancel_work_sync(&ovs_net->dp_notify_work);
1988} 2078}
1989 2079
1990static struct pernet_operations ovs_net_ops = { 2080static struct pernet_operations ovs_net_ops = {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 031dfbf37c93..16b840695216 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -57,10 +57,9 @@ struct dp_stats_percpu {
57 * struct datapath - datapath for flow-based packet switching 57 * struct datapath - datapath for flow-based packet switching
58 * @rcu: RCU callback head for deferred destruction. 58 * @rcu: RCU callback head for deferred destruction.
59 * @list_node: Element in global 'dps' list. 59 * @list_node: Element in global 'dps' list.
60 * @n_flows: Number of flows currently in flow table. 60 * @table: Current flow table. Protected by ovs_mutex and RCU.
61 * @table: Current flow table. Protected by genl_lock and RCU.
62 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by 61 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
63 * RTNL and RCU. 62 * ovs_mutex and RCU.
64 * @stats_percpu: Per-CPU datapath statistics. 63 * @stats_percpu: Per-CPU datapath statistics.
65 * @net: Reference to net namespace. 64 * @net: Reference to net namespace.
66 * 65 *
@@ -86,26 +85,6 @@ struct datapath {
86#endif 85#endif
87}; 86};
88 87
89struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
90
91static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
92{
93 WARN_ON_ONCE(!rcu_read_lock_held());
94 return ovs_lookup_vport(dp, port_no);
95}
96
97static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
98{
99 WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
100 return ovs_lookup_vport(dp, port_no);
101}
102
103static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
104{
105 ASSERT_RTNL();
106 return ovs_lookup_vport(dp, port_no);
107}
108
109/** 88/**
110 * struct ovs_skb_cb - OVS data in skb CB 89 * struct ovs_skb_cb - OVS data in skb CB
111 * @flow: The flow associated with this packet. May be %NULL if no flow. 90 * @flow: The flow associated with this packet. May be %NULL if no flow.
@@ -119,7 +98,7 @@ struct ovs_skb_cb {
119 * struct dp_upcall - metadata to include with a packet to send to userspace 98 * struct dp_upcall - metadata to include with a packet to send to userspace
120 * @cmd: One of %OVS_PACKET_CMD_*. 99 * @cmd: One of %OVS_PACKET_CMD_*.
121 * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. 100 * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull.
122 * @userdata: If nonnull, its u64 value is extracted and passed to userspace as 101 * @userdata: If nonnull, its variable-length value is passed to userspace as
123 * %OVS_PACKET_ATTR_USERDATA. 102 * %OVS_PACKET_ATTR_USERDATA.
124 * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no 103 * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no
125 * packet is sent and the packet is accounted in the datapath's @n_lost 104 * packet is sent and the packet is accounted in the datapath's @n_lost
@@ -132,6 +111,30 @@ struct dp_upcall_info {
132 u32 portid; 111 u32 portid;
133}; 112};
134 113
114/**
115 * struct ovs_net - Per net-namespace data for ovs.
116 * @dps: List of datapaths to enable dumping them all out.
117 * Protected by genl_mutex.
118 */
119struct ovs_net {
120 struct list_head dps;
121 struct work_struct dp_notify_work;
122};
123
124extern int ovs_net_id;
125void ovs_lock(void);
126void ovs_unlock(void);
127
128#ifdef CONFIG_LOCKDEP
129int lockdep_ovsl_is_held(void);
130#else
131#define lockdep_ovsl_is_held() 1
132#endif
133
134#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held()))
135#define ovsl_dereference(p) \
136 rcu_dereference_protected(p, lockdep_ovsl_is_held())
137
135static inline struct net *ovs_dp_get_net(struct datapath *dp) 138static inline struct net *ovs_dp_get_net(struct datapath *dp)
136{ 139{
137 return read_pnet(&dp->net); 140 return read_pnet(&dp->net);
@@ -142,6 +145,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
142 write_pnet(&dp->net, net); 145 write_pnet(&dp->net, net);
143} 146}
144 147
148struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
149
150static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
151{
152 WARN_ON_ONCE(!rcu_read_lock_held());
153 return ovs_lookup_vport(dp, port_no);
154}
155
156static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
157{
158 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
159 return ovs_lookup_vport(dp, port_no);
160}
161
162static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
163{
164 ASSERT_OVSL();
165 return ovs_lookup_vport(dp, port_no);
166}
167
145extern struct notifier_block ovs_dp_device_notifier; 168extern struct notifier_block ovs_dp_device_notifier;
146extern struct genl_multicast_group ovs_dp_vport_multicast_group; 169extern struct genl_multicast_group ovs_dp_vport_multicast_group;
147 170
@@ -155,4 +178,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
155 u8 cmd); 178 u8 cmd);
156 179
157int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); 180int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
181void ovs_dp_notify_wq(struct work_struct *work);
158#endif /* datapath.h */ 182#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 5558350e0d33..ef4feec6cd84 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -18,46 +18,78 @@
18 18
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <net/genetlink.h> 20#include <net/genetlink.h>
21#include <net/netns/generic.h>
21 22
22#include "datapath.h" 23#include "datapath.h"
23#include "vport-internal_dev.h" 24#include "vport-internal_dev.h"
24#include "vport-netdev.h" 25#include "vport-netdev.h"
25 26
27static void dp_detach_port_notify(struct vport *vport)
28{
29 struct sk_buff *notify;
30 struct datapath *dp;
31
32 dp = vport->dp;
33 notify = ovs_vport_cmd_build_info(vport, 0, 0,
34 OVS_VPORT_CMD_DEL);
35 ovs_dp_detach_port(vport);
36 if (IS_ERR(notify)) {
37 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
38 ovs_dp_vport_multicast_group.id,
39 PTR_ERR(notify));
40 return;
41 }
42
43 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
44 ovs_dp_vport_multicast_group.id,
45 GFP_KERNEL);
46}
47
48void ovs_dp_notify_wq(struct work_struct *work)
49{
50 struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work);
51 struct datapath *dp;
52
53 ovs_lock();
54 list_for_each_entry(dp, &ovs_net->dps, list_node) {
55 int i;
56
57 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
58 struct vport *vport;
59 struct hlist_node *n;
60
61 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
62 struct netdev_vport *netdev_vport;
63
64 if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
65 continue;
66
67 netdev_vport = netdev_vport_priv(vport);
68 if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
69 netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
70 dp_detach_port_notify(vport);
71 }
72 }
73 }
74 ovs_unlock();
75}
76
26static int dp_device_event(struct notifier_block *unused, unsigned long event, 77static int dp_device_event(struct notifier_block *unused, unsigned long event,
27 void *ptr) 78 void *ptr)
28{ 79{
80 struct ovs_net *ovs_net;
29 struct net_device *dev = ptr; 81 struct net_device *dev = ptr;
30 struct vport *vport; 82 struct vport *vport = NULL;
31 83
32 if (ovs_is_internal_dev(dev)) 84 if (!ovs_is_internal_dev(dev))
33 vport = ovs_internal_dev_get_vport(dev);
34 else
35 vport = ovs_netdev_get_vport(dev); 85 vport = ovs_netdev_get_vport(dev);
36 86
37 if (!vport) 87 if (!vport)
38 return NOTIFY_DONE; 88 return NOTIFY_DONE;
39 89
40 switch (event) { 90 if (event == NETDEV_UNREGISTER) {
41 case NETDEV_UNREGISTER: 91 ovs_net = net_generic(dev_net(dev), ovs_net_id);
42 if (!ovs_is_internal_dev(dev)) { 92 queue_work(system_wq, &ovs_net->dp_notify_work);
43 struct sk_buff *notify;
44 struct datapath *dp = vport->dp;
45
46 notify = ovs_vport_cmd_build_info(vport, 0, 0,
47 OVS_VPORT_CMD_DEL);
48 ovs_dp_detach_port(vport);
49 if (IS_ERR(notify)) {
50 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
51 ovs_dp_vport_multicast_group.id,
52 PTR_ERR(notify));
53 break;
54 }
55
56 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
57 ovs_dp_vport_multicast_group.id,
58 GFP_KERNEL);
59 }
60 break;
61 } 93 }
62 94
63 return NOTIFY_DONE; 95 return NOTIFY_DONE;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 67a2b783fe70..b15321a2228c 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -211,7 +211,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
211 return ERR_PTR(-ENOMEM); 211 return ERR_PTR(-ENOMEM);
212 212
213 sfa->actions_len = actions_len; 213 sfa->actions_len = actions_len;
214 memcpy(sfa->actions, nla_data(actions), actions_len); 214 nla_memcpy(sfa->actions, actions, actions_len);
215 return sfa; 215 return sfa;
216} 216}
217 217
@@ -466,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
466 proto = *(__be16 *) skb->data; 466 proto = *(__be16 *) skb->data;
467 __skb_pull(skb, sizeof(__be16)); 467 __skb_pull(skb, sizeof(__be16));
468 468
469 if (ntohs(proto) >= 1536) 469 if (ntohs(proto) >= ETH_P_802_3_MIN)
470 return proto; 470 return proto;
471 471
472 if (skb->len < sizeof(struct llc_snap_hdr)) 472 if (skb->len < sizeof(struct llc_snap_hdr))
@@ -483,7 +483,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
483 483
484 __skb_pull(skb, sizeof(struct llc_snap_hdr)); 484 __skb_pull(skb, sizeof(struct llc_snap_hdr));
485 485
486 if (ntohs(llc->ethertype) >= 1536) 486 if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
487 return llc->ethertype; 487 return llc->ethertype;
488 488
489 return htons(ETH_P_802_2); 489 return htons(ETH_P_802_2);
@@ -1038,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1038 1038
1039 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1039 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1040 swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1040 swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1041 if (ntohs(swkey->eth.type) < 1536) 1041 if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)
1042 return -EINVAL; 1042 return -EINVAL;
1043 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1043 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1044 } else { 1044 } else {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a7bb60ff3b5b..0875fde65b9c 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -138,27 +138,6 @@ int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
138void ovs_flow_used(struct sw_flow *, struct sk_buff *); 138void ovs_flow_used(struct sw_flow *, struct sk_buff *);
139u64 ovs_flow_used_time(unsigned long flow_jiffies); 139u64 ovs_flow_used_time(unsigned long flow_jiffies);
140 140
141/* Upper bound on the length of a nlattr-formatted flow key. The longest
142 * nlattr-formatted flow key would be:
143 *
144 * struct pad nl hdr total
145 * ------ --- ------ -----
146 * OVS_KEY_ATTR_PRIORITY 4 -- 4 8
147 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8
148 * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
149 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16
150 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
151 * OVS_KEY_ATTR_8021Q 4 -- 4 8
152 * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation)
153 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype)
154 * OVS_KEY_ATTR_IPV6 40 -- 4 44
155 * OVS_KEY_ATTR_ICMPV6 2 2 4 8
156 * OVS_KEY_ATTR_ND 28 -- 4 32
157 * -------------------------------------------------
158 * total 152
159 */
160#define FLOW_BUFSIZE 152
161
162int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); 141int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
163int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, 142int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
164 const struct nlattr *); 143 const struct nlattr *);
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 0531de6c7a4a..73682de8dc69 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -63,16 +63,6 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde
63 return stats; 63 return stats;
64} 64}
65 65
66static int internal_dev_mac_addr(struct net_device *dev, void *p)
67{
68 struct sockaddr *addr = p;
69
70 if (!is_valid_ether_addr(addr->sa_data))
71 return -EADDRNOTAVAIL;
72 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
73 return 0;
74}
75
76/* Called with rcu_read_lock_bh. */ 66/* Called with rcu_read_lock_bh. */
77static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) 67static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
78{ 68{
@@ -126,7 +116,7 @@ static const struct net_device_ops internal_dev_netdev_ops = {
126 .ndo_open = internal_dev_open, 116 .ndo_open = internal_dev_open,
127 .ndo_stop = internal_dev_stop, 117 .ndo_stop = internal_dev_stop,
128 .ndo_start_xmit = internal_dev_xmit, 118 .ndo_start_xmit = internal_dev_xmit,
129 .ndo_set_mac_address = internal_dev_mac_addr, 119 .ndo_set_mac_address = eth_mac_addr,
130 .ndo_change_mtu = internal_dev_change_mtu, 120 .ndo_change_mtu = internal_dev_change_mtu,
131 .ndo_get_stats64 = internal_dev_get_stats, 121 .ndo_get_stats64 = internal_dev_get_stats,
132}; 122};
@@ -138,6 +128,7 @@ static void do_setup(struct net_device *netdev)
138 netdev->netdev_ops = &internal_dev_netdev_ops; 128 netdev->netdev_ops = &internal_dev_netdev_ops;
139 129
140 netdev->priv_flags &= ~IFF_TX_SKB_SHARING; 130 netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
131 netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
141 netdev->destructor = internal_dev_destructor; 132 netdev->destructor = internal_dev_destructor;
142 SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); 133 SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
143 netdev->tx_queue_len = 0; 134 netdev->tx_queue_len = 0;
@@ -146,7 +137,7 @@ static void do_setup(struct net_device *netdev)
146 NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; 137 NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
147 138
148 netdev->vlan_features = netdev->features; 139 netdev->vlan_features = netdev->features;
149 netdev->features |= NETIF_F_HW_VLAN_TX; 140 netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
150 netdev->hw_features = netdev->features & ~NETIF_F_LLTX; 141 netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
151 eth_hw_addr_random(netdev); 142 eth_hw_addr_random(netdev);
152} 143}
@@ -182,16 +173,19 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
182 if (vport->port_no == OVSP_LOCAL) 173 if (vport->port_no == OVSP_LOCAL)
183 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL; 174 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
184 175
176 rtnl_lock();
185 err = register_netdevice(netdev_vport->dev); 177 err = register_netdevice(netdev_vport->dev);
186 if (err) 178 if (err)
187 goto error_free_netdev; 179 goto error_free_netdev;
188 180
189 dev_set_promiscuity(netdev_vport->dev, 1); 181 dev_set_promiscuity(netdev_vport->dev, 1);
182 rtnl_unlock();
190 netif_start_queue(netdev_vport->dev); 183 netif_start_queue(netdev_vport->dev);
191 184
192 return vport; 185 return vport;
193 186
194error_free_netdev: 187error_free_netdev:
188 rtnl_unlock();
195 free_netdev(netdev_vport->dev); 189 free_netdev(netdev_vport->dev);
196error_free_vport: 190error_free_vport:
197 ovs_vport_free(vport); 191 ovs_vport_free(vport);
@@ -204,10 +198,13 @@ static void internal_dev_destroy(struct vport *vport)
204 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 198 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
205 199
206 netif_stop_queue(netdev_vport->dev); 200 netif_stop_queue(netdev_vport->dev);
201 rtnl_lock();
207 dev_set_promiscuity(netdev_vport->dev, -1); 202 dev_set_promiscuity(netdev_vport->dev, -1);
208 203
209 /* unregister_netdevice() waits for an RCU grace period. */ 204 /* unregister_netdevice() waits for an RCU grace period. */
210 unregister_netdevice(netdev_vport->dev); 205 unregister_netdevice(netdev_vport->dev);
206
207 rtnl_unlock();
211} 208}
212 209
213static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) 210static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 2130d61c384a..40a89ae8e19f 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -100,16 +100,20 @@ static struct vport *netdev_create(const struct vport_parms *parms)
100 goto error_put; 100 goto error_put;
101 } 101 }
102 102
103 rtnl_lock();
103 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, 104 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
104 vport); 105 vport);
105 if (err) 106 if (err)
106 goto error_put; 107 goto error_unlock;
107 108
108 dev_set_promiscuity(netdev_vport->dev, 1); 109 dev_set_promiscuity(netdev_vport->dev, 1);
109 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; 110 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
111 rtnl_unlock();
110 112
111 return vport; 113 return vport;
112 114
115error_unlock:
116 rtnl_unlock();
113error_put: 117error_put:
114 dev_put(netdev_vport->dev); 118 dev_put(netdev_vport->dev);
115error_free_vport: 119error_free_vport:
@@ -131,9 +135,11 @@ static void netdev_destroy(struct vport *vport)
131{ 135{
132 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 136 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
133 137
138 rtnl_lock();
134 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; 139 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
135 netdev_rx_handler_unregister(netdev_vport->dev); 140 netdev_rx_handler_unregister(netdev_vport->dev);
136 dev_set_promiscuity(netdev_vport->dev, -1); 141 dev_set_promiscuity(netdev_vport->dev, -1);
142 rtnl_unlock();
137 143
138 call_rcu(&netdev_vport->rcu, free_port_rcu); 144 call_rcu(&netdev_vport->rcu, free_port_rcu);
139} 145}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index f6b8132ce4cb..720623190eaa 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -40,7 +40,7 @@ static const struct vport_ops *vport_ops_list[] = {
40 &ovs_internal_vport_ops, 40 &ovs_internal_vport_ops,
41}; 41};
42 42
43/* Protected by RCU read lock for reading, RTNL lock for writing. */ 43/* Protected by RCU read lock for reading, ovs_mutex for writing. */
44static struct hlist_head *dev_table; 44static struct hlist_head *dev_table;
45#define VPORT_HASH_BUCKETS 1024 45#define VPORT_HASH_BUCKETS 1024
46 46
@@ -80,7 +80,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
80 * 80 *
81 * @name: name of port to find 81 * @name: name of port to find
82 * 82 *
83 * Must be called with RTNL or RCU read lock. 83 * Must be called with ovs or RCU read lock.
84 */ 84 */
85struct vport *ovs_vport_locate(struct net *net, const char *name) 85struct vport *ovs_vport_locate(struct net *net, const char *name)
86{ 86{
@@ -128,7 +128,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
128 vport->ops = ops; 128 vport->ops = ops;
129 INIT_HLIST_NODE(&vport->dp_hash_node); 129 INIT_HLIST_NODE(&vport->dp_hash_node);
130 130
131 vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); 131 vport->percpu_stats = alloc_percpu(struct pcpu_tstats);
132 if (!vport->percpu_stats) { 132 if (!vport->percpu_stats) {
133 kfree(vport); 133 kfree(vport);
134 return ERR_PTR(-ENOMEM); 134 return ERR_PTR(-ENOMEM);
@@ -161,7 +161,7 @@ void ovs_vport_free(struct vport *vport)
161 * @parms: Information about new vport. 161 * @parms: Information about new vport.
162 * 162 *
163 * Creates a new vport with the specified configuration (which is dependent on 163 * Creates a new vport with the specified configuration (which is dependent on
164 * device type). RTNL lock must be held. 164 * device type). ovs_mutex must be held.
165 */ 165 */
166struct vport *ovs_vport_add(const struct vport_parms *parms) 166struct vport *ovs_vport_add(const struct vport_parms *parms)
167{ 167{
@@ -169,8 +169,6 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
169 int err = 0; 169 int err = 0;
170 int i; 170 int i;
171 171
172 ASSERT_RTNL();
173
174 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { 172 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
175 if (vport_ops_list[i]->type == parms->type) { 173 if (vport_ops_list[i]->type == parms->type) {
176 struct hlist_head *bucket; 174 struct hlist_head *bucket;
@@ -201,12 +199,10 @@ out:
201 * @port: New configuration. 199 * @port: New configuration.
202 * 200 *
203 * Modifies an existing device with the specified configuration (which is 201 * Modifies an existing device with the specified configuration (which is
204 * dependent on device type). RTNL lock must be held. 202 * dependent on device type). ovs_mutex must be held.
205 */ 203 */
206int ovs_vport_set_options(struct vport *vport, struct nlattr *options) 204int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
207{ 205{
208 ASSERT_RTNL();
209
210 if (!vport->ops->set_options) 206 if (!vport->ops->set_options)
211 return -EOPNOTSUPP; 207 return -EOPNOTSUPP;
212 return vport->ops->set_options(vport, options); 208 return vport->ops->set_options(vport, options);
@@ -218,11 +214,11 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
218 * @vport: vport to delete. 214 * @vport: vport to delete.
219 * 215 *
220 * Detaches @vport from its datapath and destroys it. It is possible to fail 216 * Detaches @vport from its datapath and destroys it. It is possible to fail
221 * for reasons such as lack of memory. RTNL lock must be held. 217 * for reasons such as lack of memory. ovs_mutex must be held.
222 */ 218 */
223void ovs_vport_del(struct vport *vport) 219void ovs_vport_del(struct vport *vport)
224{ 220{
225 ASSERT_RTNL(); 221 ASSERT_OVSL();
226 222
227 hlist_del_rcu(&vport->hash_node); 223 hlist_del_rcu(&vport->hash_node);
228 224
@@ -237,7 +233,7 @@ void ovs_vport_del(struct vport *vport)
237 * 233 *
238 * Retrieves transmit, receive, and error stats for the given device. 234 * Retrieves transmit, receive, and error stats for the given device.
239 * 235 *
240 * Must be called with RTNL lock or rcu_read_lock. 236 * Must be called with ovs_mutex or rcu_read_lock.
241 */ 237 */
242void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) 238void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
243{ 239{
@@ -264,16 +260,16 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
264 spin_unlock_bh(&vport->stats_lock); 260 spin_unlock_bh(&vport->stats_lock);
265 261
266 for_each_possible_cpu(i) { 262 for_each_possible_cpu(i) {
267 const struct vport_percpu_stats *percpu_stats; 263 const struct pcpu_tstats *percpu_stats;
268 struct vport_percpu_stats local_stats; 264 struct pcpu_tstats local_stats;
269 unsigned int start; 265 unsigned int start;
270 266
271 percpu_stats = per_cpu_ptr(vport->percpu_stats, i); 267 percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
272 268
273 do { 269 do {
274 start = u64_stats_fetch_begin_bh(&percpu_stats->sync); 270 start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);
275 local_stats = *percpu_stats; 271 local_stats = *percpu_stats;
276 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); 272 } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));
277 273
278 stats->rx_bytes += local_stats.rx_bytes; 274 stats->rx_bytes += local_stats.rx_bytes;
279 stats->rx_packets += local_stats.rx_packets; 275 stats->rx_packets += local_stats.rx_packets;
@@ -296,22 +292,24 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
296 * negative error code if a real error occurred. If an error occurs, @skb is 292 * negative error code if a real error occurred. If an error occurs, @skb is
297 * left unmodified. 293 * left unmodified.
298 * 294 *
299 * Must be called with RTNL lock or rcu_read_lock. 295 * Must be called with ovs_mutex or rcu_read_lock.
300 */ 296 */
301int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) 297int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
302{ 298{
303 struct nlattr *nla; 299 struct nlattr *nla;
300 int err;
301
302 if (!vport->ops->get_options)
303 return 0;
304 304
305 nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS); 305 nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
306 if (!nla) 306 if (!nla)
307 return -EMSGSIZE; 307 return -EMSGSIZE;
308 308
309 if (vport->ops->get_options) { 309 err = vport->ops->get_options(vport, skb);
310 int err = vport->ops->get_options(vport, skb); 310 if (err) {
311 if (err) { 311 nla_nest_cancel(skb, nla);
312 nla_nest_cancel(skb, nla); 312 return err;
313 return err;
314 }
315 } 313 }
316 314
317 nla_nest_end(skb, nla); 315 nla_nest_end(skb, nla);
@@ -329,13 +327,13 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
329 */ 327 */
330void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) 328void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
331{ 329{
332 struct vport_percpu_stats *stats; 330 struct pcpu_tstats *stats;
333 331
334 stats = this_cpu_ptr(vport->percpu_stats); 332 stats = this_cpu_ptr(vport->percpu_stats);
335 u64_stats_update_begin(&stats->sync); 333 u64_stats_update_begin(&stats->syncp);
336 stats->rx_packets++; 334 stats->rx_packets++;
337 stats->rx_bytes += skb->len; 335 stats->rx_bytes += skb->len;
338 u64_stats_update_end(&stats->sync); 336 u64_stats_update_end(&stats->syncp);
339 337
340 ovs_dp_process_received_packet(vport, skb); 338 ovs_dp_process_received_packet(vport, skb);
341} 339}
@@ -346,7 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
346 * @vport: vport on which to send the packet 344 * @vport: vport on which to send the packet
347 * @skb: skb to send 345 * @skb: skb to send
348 * 346 *
349 * Sends the given packet and returns the length of data sent. Either RTNL 347 * Sends the given packet and returns the length of data sent. Either ovs
350 * lock or rcu_read_lock must be held. 348 * lock or rcu_read_lock must be held.
351 */ 349 */
352int ovs_vport_send(struct vport *vport, struct sk_buff *skb) 350int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
@@ -354,14 +352,14 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
354 int sent = vport->ops->send(vport, skb); 352 int sent = vport->ops->send(vport, skb);
355 353
356 if (likely(sent)) { 354 if (likely(sent)) {
357 struct vport_percpu_stats *stats; 355 struct pcpu_tstats *stats;
358 356
359 stats = this_cpu_ptr(vport->percpu_stats); 357 stats = this_cpu_ptr(vport->percpu_stats);
360 358
361 u64_stats_update_begin(&stats->sync); 359 u64_stats_update_begin(&stats->syncp);
362 stats->tx_packets++; 360 stats->tx_packets++;
363 stats->tx_bytes += sent; 361 stats->tx_bytes += sent;
364 u64_stats_update_end(&stats->sync); 362 u64_stats_update_end(&stats->syncp);
365 } 363 }
366 return sent; 364 return sent;
367} 365}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 3f7961ea3c56..7ba08c30b853 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -19,6 +19,7 @@
19#ifndef VPORT_H 19#ifndef VPORT_H
20#define VPORT_H 1 20#define VPORT_H 1
21 21
22#include <linux/if_tunnel.h>
22#include <linux/list.h> 23#include <linux/list.h>
23#include <linux/netlink.h> 24#include <linux/netlink.h>
24#include <linux/openvswitch.h> 25#include <linux/openvswitch.h>
@@ -50,14 +51,6 @@ int ovs_vport_send(struct vport *, struct sk_buff *);
50 51
51/* The following definitions are for implementers of vport devices: */ 52/* The following definitions are for implementers of vport devices: */
52 53
53struct vport_percpu_stats {
54 u64 rx_bytes;
55 u64 rx_packets;
56 u64 tx_bytes;
57 u64 tx_packets;
58 struct u64_stats_sync sync;
59};
60
61struct vport_err_stats { 54struct vport_err_stats {
62 u64 rx_dropped; 55 u64 rx_dropped;
63 u64 rx_errors; 56 u64 rx_errors;
@@ -68,10 +61,10 @@ struct vport_err_stats {
68/** 61/**
69 * struct vport - one port within a datapath 62 * struct vport - one port within a datapath
70 * @rcu: RCU callback head for deferred destruction. 63 * @rcu: RCU callback head for deferred destruction.
71 * @port_no: Index into @dp's @ports array.
72 * @dp: Datapath to which this port belongs. 64 * @dp: Datapath to which this port belongs.
73 * @upcall_portid: The Netlink port to use for packets received on this port that 65 * @upcall_portid: The Netlink port to use for packets received on this port that
74 * miss the flow table. 66 * miss the flow table.
67 * @port_no: Index into @dp's @ports array.
75 * @hash_node: Element in @dev_table hash table in vport.c. 68 * @hash_node: Element in @dev_table hash table in vport.c.
76 * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. 69 * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
77 * @ops: Class structure. 70 * @ops: Class structure.
@@ -81,15 +74,15 @@ struct vport_err_stats {
81 */ 74 */
82struct vport { 75struct vport {
83 struct rcu_head rcu; 76 struct rcu_head rcu;
84 u16 port_no;
85 struct datapath *dp; 77 struct datapath *dp;
86 u32 upcall_portid; 78 u32 upcall_portid;
79 u16 port_no;
87 80
88 struct hlist_node hash_node; 81 struct hlist_node hash_node;
89 struct hlist_node dp_hash_node; 82 struct hlist_node dp_hash_node;
90 const struct vport_ops *ops; 83 const struct vport_ops *ops;
91 84
92 struct vport_percpu_stats __percpu *percpu_stats; 85 struct pcpu_tstats __percpu *percpu_stats;
93 86
94 spinlock_t stats_lock; 87 spinlock_t stats_lock;
95 struct vport_err_stats err_stats; 88 struct vport_err_stats err_stats;
@@ -138,14 +131,14 @@ struct vport_parms {
138struct vport_ops { 131struct vport_ops {
139 enum ovs_vport_type type; 132 enum ovs_vport_type type;
140 133
141 /* Called with RTNL lock. */ 134 /* Called with ovs_mutex. */
142 struct vport *(*create)(const struct vport_parms *); 135 struct vport *(*create)(const struct vport_parms *);
143 void (*destroy)(struct vport *); 136 void (*destroy)(struct vport *);
144 137
145 int (*set_options)(struct vport *, struct nlattr *); 138 int (*set_options)(struct vport *, struct nlattr *);
146 int (*get_options)(const struct vport *, struct sk_buff *); 139 int (*get_options)(const struct vport *, struct sk_buff *);
147 140
148 /* Called with rcu_read_lock or RTNL lock. */ 141 /* Called with rcu_read_lock or ovs_mutex. */
149 const char *(*get_name)(const struct vport *); 142 const char *(*get_name)(const struct vport *);
150 void (*get_config)(const struct vport *, void *); 143 void (*get_config)(const struct vport *, void *);
151 int (*get_ifindex)(const struct vport *); 144 int (*get_ifindex)(const struct vport *);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1d6793dbfbae..7e387ff64465 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -158,10 +158,16 @@ struct packet_mreq_max {
158 unsigned char mr_address[MAX_ADDR_LEN]; 158 unsigned char mr_address[MAX_ADDR_LEN];
159}; 159};
160 160
161union tpacket_uhdr {
162 struct tpacket_hdr *h1;
163 struct tpacket2_hdr *h2;
164 struct tpacket3_hdr *h3;
165 void *raw;
166};
167
161static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, 168static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
162 int closing, int tx_ring); 169 int closing, int tx_ring);
163 170
164
165#define V3_ALIGNMENT (8) 171#define V3_ALIGNMENT (8)
166 172
167#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) 173#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
@@ -181,6 +187,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
181 187
182struct packet_sock; 188struct packet_sock;
183static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); 189static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
190static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
191 struct packet_type *pt, struct net_device *orig_dev);
184 192
185static void *packet_previous_frame(struct packet_sock *po, 193static void *packet_previous_frame(struct packet_sock *po,
186 struct packet_ring_buffer *rb, 194 struct packet_ring_buffer *rb,
@@ -288,11 +296,7 @@ static inline __pure struct page *pgv_to_page(void *addr)
288 296
289static void __packet_set_status(struct packet_sock *po, void *frame, int status) 297static void __packet_set_status(struct packet_sock *po, void *frame, int status)
290{ 298{
291 union { 299 union tpacket_uhdr h;
292 struct tpacket_hdr *h1;
293 struct tpacket2_hdr *h2;
294 void *raw;
295 } h;
296 300
297 h.raw = frame; 301 h.raw = frame;
298 switch (po->tp_version) { 302 switch (po->tp_version) {
@@ -315,11 +319,7 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
315 319
316static int __packet_get_status(struct packet_sock *po, void *frame) 320static int __packet_get_status(struct packet_sock *po, void *frame)
317{ 321{
318 union { 322 union tpacket_uhdr h;
319 struct tpacket_hdr *h1;
320 struct tpacket2_hdr *h2;
321 void *raw;
322 } h;
323 323
324 smp_rmb(); 324 smp_rmb();
325 325
@@ -345,11 +345,7 @@ static void *packet_lookup_frame(struct packet_sock *po,
345 int status) 345 int status)
346{ 346{
347 unsigned int pg_vec_pos, frame_offset; 347 unsigned int pg_vec_pos, frame_offset;
348 union { 348 union tpacket_uhdr h;
349 struct tpacket_hdr *h1;
350 struct tpacket2_hdr *h2;
351 void *raw;
352 } h;
353 349
354 pg_vec_pos = position / rb->frames_per_block; 350 pg_vec_pos = position / rb->frames_per_block;
355 frame_offset = position % rb->frames_per_block; 351 frame_offset = position % rb->frames_per_block;
@@ -973,11 +969,11 @@ static void *packet_current_rx_frame(struct packet_sock *po,
973 969
974static void *prb_lookup_block(struct packet_sock *po, 970static void *prb_lookup_block(struct packet_sock *po,
975 struct packet_ring_buffer *rb, 971 struct packet_ring_buffer *rb,
976 unsigned int previous, 972 unsigned int idx,
977 int status) 973 int status)
978{ 974{
979 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); 975 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
980 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); 976 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
981 977
982 if (status != BLOCK_STATUS(pbd)) 978 if (status != BLOCK_STATUS(pbd))
983 return NULL; 979 return NULL;
@@ -1041,6 +1037,29 @@ static void packet_increment_head(struct packet_ring_buffer *buff)
1041 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; 1037 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1042} 1038}
1043 1039
1040static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1041{
1042 struct sock *sk = &po->sk;
1043 bool has_room;
1044
1045 if (po->prot_hook.func != tpacket_rcv)
1046 return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
1047 <= sk->sk_rcvbuf;
1048
1049 spin_lock(&sk->sk_receive_queue.lock);
1050 if (po->tp_version == TPACKET_V3)
1051 has_room = prb_lookup_block(po, &po->rx_ring,
1052 po->rx_ring.prb_bdqc.kactive_blk_num,
1053 TP_STATUS_KERNEL);
1054 else
1055 has_room = packet_lookup_frame(po, &po->rx_ring,
1056 po->rx_ring.head,
1057 TP_STATUS_KERNEL);
1058 spin_unlock(&sk->sk_receive_queue.lock);
1059
1060 return has_room;
1061}
1062
1044static void packet_sock_destruct(struct sock *sk) 1063static void packet_sock_destruct(struct sock *sk)
1045{ 1064{
1046 skb_queue_purge(&sk->sk_error_queue); 1065 skb_queue_purge(&sk->sk_error_queue);
@@ -1066,16 +1085,16 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
1066 return x; 1085 return x;
1067} 1086}
1068 1087
1069static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) 1088static unsigned int fanout_demux_hash(struct packet_fanout *f,
1089 struct sk_buff *skb,
1090 unsigned int num)
1070{ 1091{
1071 u32 idx, hash = skb->rxhash; 1092 return (((u64)skb->rxhash) * num) >> 32;
1072
1073 idx = ((u64)hash * num) >> 32;
1074
1075 return f->arr[idx];
1076} 1093}
1077 1094
1078static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) 1095static unsigned int fanout_demux_lb(struct packet_fanout *f,
1096 struct sk_buff *skb,
1097 unsigned int num)
1079{ 1098{
1080 int cur, old; 1099 int cur, old;
1081 1100
@@ -1083,14 +1102,40 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb
1083 while ((old = atomic_cmpxchg(&f->rr_cur, cur, 1102 while ((old = atomic_cmpxchg(&f->rr_cur, cur,
1084 fanout_rr_next(f, num))) != cur) 1103 fanout_rr_next(f, num))) != cur)
1085 cur = old; 1104 cur = old;
1086 return f->arr[cur]; 1105 return cur;
1106}
1107
1108static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1109 struct sk_buff *skb,
1110 unsigned int num)
1111{
1112 return smp_processor_id() % num;
1087} 1113}
1088 1114
1089static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) 1115static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1116 struct sk_buff *skb,
1117 unsigned int idx, unsigned int skip,
1118 unsigned int num)
1090{ 1119{
1091 unsigned int cpu = smp_processor_id(); 1120 unsigned int i, j;
1092 1121
1093 return f->arr[cpu % num]; 1122 i = j = min_t(int, f->next[idx], num - 1);
1123 do {
1124 if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
1125 if (i != j)
1126 f->next[idx] = i;
1127 return i;
1128 }
1129 if (++i == num)
1130 i = 0;
1131 } while (i != j);
1132
1133 return idx;
1134}
1135
1136static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1137{
1138 return f->flags & (flag >> 8);
1094} 1139}
1095 1140
1096static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, 1141static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
@@ -1099,7 +1144,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1099 struct packet_fanout *f = pt->af_packet_priv; 1144 struct packet_fanout *f = pt->af_packet_priv;
1100 unsigned int num = f->num_members; 1145 unsigned int num = f->num_members;
1101 struct packet_sock *po; 1146 struct packet_sock *po;
1102 struct sock *sk; 1147 unsigned int idx;
1103 1148
1104 if (!net_eq(dev_net(dev), read_pnet(&f->net)) || 1149 if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
1105 !num) { 1150 !num) {
@@ -1110,23 +1155,31 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1110 switch (f->type) { 1155 switch (f->type) {
1111 case PACKET_FANOUT_HASH: 1156 case PACKET_FANOUT_HASH:
1112 default: 1157 default:
1113 if (f->defrag) { 1158 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1114 skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); 1159 skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
1115 if (!skb) 1160 if (!skb)
1116 return 0; 1161 return 0;
1117 } 1162 }
1118 skb_get_rxhash(skb); 1163 skb_get_rxhash(skb);
1119 sk = fanout_demux_hash(f, skb, num); 1164 idx = fanout_demux_hash(f, skb, num);
1120 break; 1165 break;
1121 case PACKET_FANOUT_LB: 1166 case PACKET_FANOUT_LB:
1122 sk = fanout_demux_lb(f, skb, num); 1167 idx = fanout_demux_lb(f, skb, num);
1123 break; 1168 break;
1124 case PACKET_FANOUT_CPU: 1169 case PACKET_FANOUT_CPU:
1125 sk = fanout_demux_cpu(f, skb, num); 1170 idx = fanout_demux_cpu(f, skb, num);
1171 break;
1172 case PACKET_FANOUT_ROLLOVER:
1173 idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
1126 break; 1174 break;
1127 } 1175 }
1128 1176
1129 po = pkt_sk(sk); 1177 po = pkt_sk(f->arr[idx]);
1178 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) &&
1179 unlikely(!packet_rcv_has_room(po, skb))) {
1180 idx = fanout_demux_rollover(f, skb, idx, idx, num);
1181 po = pkt_sk(f->arr[idx]);
1182 }
1130 1183
1131 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); 1184 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1132} 1185}
@@ -1175,10 +1228,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1175 struct packet_sock *po = pkt_sk(sk); 1228 struct packet_sock *po = pkt_sk(sk);
1176 struct packet_fanout *f, *match; 1229 struct packet_fanout *f, *match;
1177 u8 type = type_flags & 0xff; 1230 u8 type = type_flags & 0xff;
1178 u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; 1231 u8 flags = type_flags >> 8;
1179 int err; 1232 int err;
1180 1233
1181 switch (type) { 1234 switch (type) {
1235 case PACKET_FANOUT_ROLLOVER:
1236 if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
1237 return -EINVAL;
1182 case PACKET_FANOUT_HASH: 1238 case PACKET_FANOUT_HASH:
1183 case PACKET_FANOUT_LB: 1239 case PACKET_FANOUT_LB:
1184 case PACKET_FANOUT_CPU: 1240 case PACKET_FANOUT_CPU:
@@ -1203,7 +1259,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1203 } 1259 }
1204 } 1260 }
1205 err = -EINVAL; 1261 err = -EINVAL;
1206 if (match && match->defrag != defrag) 1262 if (match && match->flags != flags)
1207 goto out; 1263 goto out;
1208 if (!match) { 1264 if (!match) {
1209 err = -ENOMEM; 1265 err = -ENOMEM;
@@ -1213,7 +1269,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1213 write_pnet(&match->net, sock_net(sk)); 1269 write_pnet(&match->net, sock_net(sk));
1214 match->id = id; 1270 match->id = id;
1215 match->type = type; 1271 match->type = type;
1216 match->defrag = defrag; 1272 match->flags = flags;
1217 atomic_set(&match->rr_cur, 0); 1273 atomic_set(&match->rr_cur, 0);
1218 INIT_LIST_HEAD(&match->list); 1274 INIT_LIST_HEAD(&match->list);
1219 spin_lock_init(&match->lock); 1275 spin_lock_init(&match->lock);
@@ -1443,13 +1499,14 @@ retry:
1443 skb->dev = dev; 1499 skb->dev = dev;
1444 skb->priority = sk->sk_priority; 1500 skb->priority = sk->sk_priority;
1445 skb->mark = sk->sk_mark; 1501 skb->mark = sk->sk_mark;
1446 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 1502
1447 if (err < 0) 1503 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1448 goto out_unlock;
1449 1504
1450 if (unlikely(extra_len == 4)) 1505 if (unlikely(extra_len == 4))
1451 skb->no_fcs = 1; 1506 skb->no_fcs = 1;
1452 1507
1508 skb_probe_transport_header(skb, 0);
1509
1453 dev_queue_xmit(skb); 1510 dev_queue_xmit(skb);
1454 rcu_read_unlock(); 1511 rcu_read_unlock();
1455 return len; 1512 return len;
@@ -1600,27 +1657,40 @@ drop:
1600 return 0; 1657 return 0;
1601} 1658}
1602 1659
1660static void tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
1661 unsigned int flags)
1662{
1663 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1664
1665 if (shhwtstamps) {
1666 if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
1667 ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
1668 return;
1669 if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
1670 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
1671 return;
1672 }
1673
1674 if (ktime_to_timespec_cond(skb->tstamp, ts))
1675 return;
1676
1677 getnstimeofday(ts);
1678}
1679
1603static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, 1680static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1604 struct packet_type *pt, struct net_device *orig_dev) 1681 struct packet_type *pt, struct net_device *orig_dev)
1605{ 1682{
1606 struct sock *sk; 1683 struct sock *sk;
1607 struct packet_sock *po; 1684 struct packet_sock *po;
1608 struct sockaddr_ll *sll; 1685 struct sockaddr_ll *sll;
1609 union { 1686 union tpacket_uhdr h;
1610 struct tpacket_hdr *h1;
1611 struct tpacket2_hdr *h2;
1612 struct tpacket3_hdr *h3;
1613 void *raw;
1614 } h;
1615 u8 *skb_head = skb->data; 1687 u8 *skb_head = skb->data;
1616 int skb_len = skb->len; 1688 int skb_len = skb->len;
1617 unsigned int snaplen, res; 1689 unsigned int snaplen, res;
1618 unsigned long status = TP_STATUS_USER; 1690 unsigned long status = TP_STATUS_USER;
1619 unsigned short macoff, netoff, hdrlen; 1691 unsigned short macoff, netoff, hdrlen;
1620 struct sk_buff *copy_skb = NULL; 1692 struct sk_buff *copy_skb = NULL;
1621 struct timeval tv;
1622 struct timespec ts; 1693 struct timespec ts;
1623 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1624 1694
1625 if (skb->pkt_type == PACKET_LOOPBACK) 1695 if (skb->pkt_type == PACKET_LOOPBACK)
1626 goto drop; 1696 goto drop;
@@ -1703,6 +1773,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1703 spin_unlock(&sk->sk_receive_queue.lock); 1773 spin_unlock(&sk->sk_receive_queue.lock);
1704 1774
1705 skb_copy_bits(skb, 0, h.raw + macoff, snaplen); 1775 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1776 tpacket_get_timestamp(skb, &ts, po->tp_tstamp);
1706 1777
1707 switch (po->tp_version) { 1778 switch (po->tp_version) {
1708 case TPACKET_V1: 1779 case TPACKET_V1:
@@ -1710,18 +1781,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1710 h.h1->tp_snaplen = snaplen; 1781 h.h1->tp_snaplen = snaplen;
1711 h.h1->tp_mac = macoff; 1782 h.h1->tp_mac = macoff;
1712 h.h1->tp_net = netoff; 1783 h.h1->tp_net = netoff;
1713 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) 1784 h.h1->tp_sec = ts.tv_sec;
1714 && shhwtstamps->syststamp.tv64) 1785 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
1715 tv = ktime_to_timeval(shhwtstamps->syststamp);
1716 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1717 && shhwtstamps->hwtstamp.tv64)
1718 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
1719 else if (skb->tstamp.tv64)
1720 tv = ktime_to_timeval(skb->tstamp);
1721 else
1722 do_gettimeofday(&tv);
1723 h.h1->tp_sec = tv.tv_sec;
1724 h.h1->tp_usec = tv.tv_usec;
1725 hdrlen = sizeof(*h.h1); 1786 hdrlen = sizeof(*h.h1);
1726 break; 1787 break;
1727 case TPACKET_V2: 1788 case TPACKET_V2:
@@ -1729,16 +1790,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1729 h.h2->tp_snaplen = snaplen; 1790 h.h2->tp_snaplen = snaplen;
1730 h.h2->tp_mac = macoff; 1791 h.h2->tp_mac = macoff;
1731 h.h2->tp_net = netoff; 1792 h.h2->tp_net = netoff;
1732 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1733 && shhwtstamps->syststamp.tv64)
1734 ts = ktime_to_timespec(shhwtstamps->syststamp);
1735 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1736 && shhwtstamps->hwtstamp.tv64)
1737 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1738 else if (skb->tstamp.tv64)
1739 ts = ktime_to_timespec(skb->tstamp);
1740 else
1741 getnstimeofday(&ts);
1742 h.h2->tp_sec = ts.tv_sec; 1793 h.h2->tp_sec = ts.tv_sec;
1743 h.h2->tp_nsec = ts.tv_nsec; 1794 h.h2->tp_nsec = ts.tv_nsec;
1744 if (vlan_tx_tag_present(skb)) { 1795 if (vlan_tx_tag_present(skb)) {
@@ -1759,16 +1810,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1759 h.h3->tp_snaplen = snaplen; 1810 h.h3->tp_snaplen = snaplen;
1760 h.h3->tp_mac = macoff; 1811 h.h3->tp_mac = macoff;
1761 h.h3->tp_net = netoff; 1812 h.h3->tp_net = netoff;
1762 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1763 && shhwtstamps->syststamp.tv64)
1764 ts = ktime_to_timespec(shhwtstamps->syststamp);
1765 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1766 && shhwtstamps->hwtstamp.tv64)
1767 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1768 else if (skb->tstamp.tv64)
1769 ts = ktime_to_timespec(skb->tstamp);
1770 else
1771 getnstimeofday(&ts);
1772 h.h3->tp_sec = ts.tv_sec; 1813 h.h3->tp_sec = ts.tv_sec;
1773 h.h3->tp_nsec = ts.tv_nsec; 1814 h.h3->tp_nsec = ts.tv_nsec;
1774 hdrlen = sizeof(*h.h3); 1815 hdrlen = sizeof(*h.h3);
@@ -1846,11 +1887,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
1846 void *frame, struct net_device *dev, int size_max, 1887 void *frame, struct net_device *dev, int size_max,
1847 __be16 proto, unsigned char *addr, int hlen) 1888 __be16 proto, unsigned char *addr, int hlen)
1848{ 1889{
1849 union { 1890 union tpacket_uhdr ph;
1850 struct tpacket_hdr *h1;
1851 struct tpacket2_hdr *h2;
1852 void *raw;
1853 } ph;
1854 int to_write, offset, len, tp_len, nr_frags, len_max; 1891 int to_write, offset, len, tp_len, nr_frags, len_max;
1855 struct socket *sock = po->sk.sk_socket; 1892 struct socket *sock = po->sk.sk_socket;
1856 struct page *page; 1893 struct page *page;
@@ -1880,6 +1917,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
1880 1917
1881 skb_reserve(skb, hlen); 1918 skb_reserve(skb, hlen);
1882 skb_reset_network_header(skb); 1919 skb_reset_network_header(skb);
1920 skb_probe_transport_header(skb, 0);
1883 1921
1884 if (po->tp_tx_has_off) { 1922 if (po->tp_tx_has_off) {
1885 int off_min, off_max, off; 1923 int off_min, off_max, off;
@@ -2247,9 +2285,8 @@ static int packet_snd(struct socket *sock,
2247 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); 2285 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
2248 if (err) 2286 if (err)
2249 goto out_free; 2287 goto out_free;
2250 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 2288
2251 if (err < 0) 2289 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2252 goto out_free;
2253 2290
2254 if (!gso_type && (len > dev->mtu + reserve + extra_len)) { 2291 if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
2255 /* Earlier code assumed this would be a VLAN pkt, 2292 /* Earlier code assumed this would be a VLAN pkt,
@@ -2289,6 +2326,8 @@ static int packet_snd(struct socket *sock,
2289 len += vnet_hdr_len; 2326 len += vnet_hdr_len;
2290 } 2327 }
2291 2328
2329 skb_probe_transport_header(skb, reserve);
2330
2292 if (unlikely(extra_len == 4)) 2331 if (unlikely(extra_len == 4))
2293 skb->no_fcs = 1; 2332 skb->no_fcs = 1;
2294 2333
@@ -3240,7 +3279,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3240 case PACKET_FANOUT: 3279 case PACKET_FANOUT:
3241 val = (po->fanout ? 3280 val = (po->fanout ?
3242 ((u32)po->fanout->id | 3281 ((u32)po->fanout->id |
3243 ((u32)po->fanout->type << 16)) : 3282 ((u32)po->fanout->type << 16) |
3283 ((u32)po->fanout->flags << 24)) :
3244 0); 3284 0);
3245 break; 3285 break;
3246 case PACKET_TX_HAS_OFF: 3286 case PACKET_TX_HAS_OFF:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index e84cab8cb7a9..e891f025a1b9 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -77,10 +77,11 @@ struct packet_fanout {
77 unsigned int num_members; 77 unsigned int num_members;
78 u16 id; 78 u16 id;
79 u8 type; 79 u8 type;
80 u8 defrag; 80 u8 flags;
81 atomic_t rr_cur; 81 atomic_t rr_cur;
82 struct list_head list; 82 struct list_head list;
83 struct sock *arr[PACKET_FANOUT_MAX]; 83 struct sock *arr[PACKET_FANOUT_MAX];
84 int next[PACKET_FANOUT_MAX];
84 spinlock_t lock; 85 spinlock_t lock;
85 atomic_t sk_ref; 86 atomic_t sk_ref;
86 struct packet_type prot_hook ____cacheline_aligned_in_smp; 87 struct packet_type prot_hook ____cacheline_aligned_in_smp;
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 0193630d3061..dc15f4300808 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -61,7 +61,7 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {
61 [IFA_LOCAL] = { .type = NLA_U8 }, 61 [IFA_LOCAL] = { .type = NLA_U8 },
62}; 62};
63 63
64static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) 64static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
65{ 65{
66 struct net *net = sock_net(skb->sk); 66 struct net *net = sock_net(skb->sk);
67 struct nlattr *tb[IFA_MAX+1]; 67 struct nlattr *tb[IFA_MAX+1];
@@ -224,7 +224,7 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = {
224 [RTA_OIF] = { .type = NLA_U32 }, 224 [RTA_OIF] = { .type = NLA_U32 },
225}; 225};
226 226
227static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) 227static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
228{ 228{
229 struct net *net = sock_net(skb->sk); 229 struct net *net = sock_net(skb->sk);
230 struct nlattr *tb[RTA_MAX+1]; 230 struct nlattr *tb[RTA_MAX+1];
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
index 4b5ab21ecb24..d11ac79246e4 100644
--- a/net/rfkill/rfkill-regulator.c
+++ b/net/rfkill/rfkill-regulator.c
@@ -51,7 +51,7 @@ static int rfkill_regulator_set_block(void *data, bool blocked)
51 return 0; 51 return 0;
52} 52}
53 53
54struct rfkill_ops rfkill_regulator_ops = { 54static struct rfkill_ops rfkill_regulator_ops = {
55 .set_block = rfkill_regulator_set_block, 55 .set_block = rfkill_regulator_set_block,
56}; 56};
57 57
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8579c4bb20c9..fd7072827a40 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -982,7 +982,7 @@ done:
982 return ret; 982 return ret;
983} 983}
984 984
985static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 985static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
986{ 986{
987 struct net *net = sock_net(skb->sk); 987 struct net *net = sock_net(skb->sk);
988 struct nlattr *tca[TCA_ACT_MAX + 1]; 988 struct nlattr *tca[TCA_ACT_MAX + 1];
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 08fa1e8a4ca4..3a4c0caa1f7d 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -166,15 +166,17 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
166 return 1; 166 return 1;
167} 167}
168 168
169static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, 169static int tcf_csum_ipv6_icmp(struct sk_buff *skb,
170 unsigned int ihl, unsigned int ipl) 170 unsigned int ihl, unsigned int ipl)
171{ 171{
172 struct icmp6hdr *icmp6h; 172 struct icmp6hdr *icmp6h;
173 const struct ipv6hdr *ip6h;
173 174
174 icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h)); 175 icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
175 if (icmp6h == NULL) 176 if (icmp6h == NULL)
176 return 0; 177 return 0;
177 178
179 ip6h = ipv6_hdr(skb);
178 icmp6h->icmp6_cksum = 0; 180 icmp6h->icmp6_cksum = 0;
179 skb->csum = csum_partial(icmp6h, ipl - ihl, 0); 181 skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
180 icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 182 icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -186,15 +188,17 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
186 return 1; 188 return 1;
187} 189}
188 190
189static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, 191static int tcf_csum_ipv4_tcp(struct sk_buff *skb,
190 unsigned int ihl, unsigned int ipl) 192 unsigned int ihl, unsigned int ipl)
191{ 193{
192 struct tcphdr *tcph; 194 struct tcphdr *tcph;
195 const struct iphdr *iph;
193 196
194 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); 197 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
195 if (tcph == NULL) 198 if (tcph == NULL)
196 return 0; 199 return 0;
197 200
201 iph = ip_hdr(skb);
198 tcph->check = 0; 202 tcph->check = 0;
199 skb->csum = csum_partial(tcph, ipl - ihl, 0); 203 skb->csum = csum_partial(tcph, ipl - ihl, 0);
200 tcph->check = tcp_v4_check(ipl - ihl, 204 tcph->check = tcp_v4_check(ipl - ihl,
@@ -205,15 +209,17 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
205 return 1; 209 return 1;
206} 210}
207 211
208static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, 212static int tcf_csum_ipv6_tcp(struct sk_buff *skb,
209 unsigned int ihl, unsigned int ipl) 213 unsigned int ihl, unsigned int ipl)
210{ 214{
211 struct tcphdr *tcph; 215 struct tcphdr *tcph;
216 const struct ipv6hdr *ip6h;
212 217
213 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); 218 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
214 if (tcph == NULL) 219 if (tcph == NULL)
215 return 0; 220 return 0;
216 221
222 ip6h = ipv6_hdr(skb);
217 tcph->check = 0; 223 tcph->check = 0;
218 skb->csum = csum_partial(tcph, ipl - ihl, 0); 224 skb->csum = csum_partial(tcph, ipl - ihl, 0);
219 tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 225 tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -225,10 +231,11 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
225 return 1; 231 return 1;
226} 232}
227 233
228static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, 234static int tcf_csum_ipv4_udp(struct sk_buff *skb,
229 unsigned int ihl, unsigned int ipl, int udplite) 235 unsigned int ihl, unsigned int ipl, int udplite)
230{ 236{
231 struct udphdr *udph; 237 struct udphdr *udph;
238 const struct iphdr *iph;
232 u16 ul; 239 u16 ul;
233 240
234 /* 241 /*
@@ -242,6 +249,7 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
242 if (udph == NULL) 249 if (udph == NULL)
243 return 0; 250 return 0;
244 251
252 iph = ip_hdr(skb);
245 ul = ntohs(udph->len); 253 ul = ntohs(udph->len);
246 254
247 if (udplite || udph->check) { 255 if (udplite || udph->check) {
@@ -276,10 +284,11 @@ ignore_obscure_skb:
276 return 1; 284 return 1;
277} 285}
278 286
279static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, 287static int tcf_csum_ipv6_udp(struct sk_buff *skb,
280 unsigned int ihl, unsigned int ipl, int udplite) 288 unsigned int ihl, unsigned int ipl, int udplite)
281{ 289{
282 struct udphdr *udph; 290 struct udphdr *udph;
291 const struct ipv6hdr *ip6h;
283 u16 ul; 292 u16 ul;
284 293
285 /* 294 /*
@@ -293,6 +302,7 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
293 if (udph == NULL) 302 if (udph == NULL)
294 return 0; 303 return 0;
295 304
305 ip6h = ipv6_hdr(skb);
296 ul = ntohs(udph->len); 306 ul = ntohs(udph->len);
297 307
298 udph->check = 0; 308 udph->check = 0;
@@ -328,7 +338,7 @@ ignore_obscure_skb:
328 338
329static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) 339static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
330{ 340{
331 struct iphdr *iph; 341 const struct iphdr *iph;
332 int ntkoff; 342 int ntkoff;
333 343
334 ntkoff = skb_network_offset(skb); 344 ntkoff = skb_network_offset(skb);
@@ -353,19 +363,19 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
353 break; 363 break;
354 case IPPROTO_TCP: 364 case IPPROTO_TCP:
355 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) 365 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
356 if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4, 366 if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4,
357 ntohs(iph->tot_len))) 367 ntohs(iph->tot_len)))
358 goto fail; 368 goto fail;
359 break; 369 break;
360 case IPPROTO_UDP: 370 case IPPROTO_UDP:
361 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) 371 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
362 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, 372 if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
363 ntohs(iph->tot_len), 0)) 373 ntohs(iph->tot_len), 0))
364 goto fail; 374 goto fail;
365 break; 375 break;
366 case IPPROTO_UDPLITE: 376 case IPPROTO_UDPLITE:
367 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) 377 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
368 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, 378 if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
369 ntohs(iph->tot_len), 1)) 379 ntohs(iph->tot_len), 1))
370 goto fail; 380 goto fail;
371 break; 381 break;
@@ -377,7 +387,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
377 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 387 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
378 goto fail; 388 goto fail;
379 389
380 ip_send_check(iph); 390 ip_send_check(ip_hdr(skb));
381 } 391 }
382 392
383 return 1; 393 return 1;
@@ -456,6 +466,7 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
456 ixhl = ipv6_optlen(ip6xh); 466 ixhl = ipv6_optlen(ip6xh);
457 if (!pskb_may_pull(skb, hl + ixhl + ntkoff)) 467 if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
458 goto fail; 468 goto fail;
469 ip6xh = (void *)(skb_network_header(skb) + hl);
459 if ((nexthdr == NEXTHDR_HOP) && 470 if ((nexthdr == NEXTHDR_HOP) &&
460 !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl))) 471 !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
461 goto fail; 472 goto fail;
@@ -464,25 +475,25 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
464 break; 475 break;
465 case IPPROTO_ICMPV6: 476 case IPPROTO_ICMPV6:
466 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) 477 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
467 if (!tcf_csum_ipv6_icmp(skb, ip6h, 478 if (!tcf_csum_ipv6_icmp(skb,
468 hl, pl + sizeof(*ip6h))) 479 hl, pl + sizeof(*ip6h)))
469 goto fail; 480 goto fail;
470 goto done; 481 goto done;
471 case IPPROTO_TCP: 482 case IPPROTO_TCP:
472 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) 483 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
473 if (!tcf_csum_ipv6_tcp(skb, ip6h, 484 if (!tcf_csum_ipv6_tcp(skb,
474 hl, pl + sizeof(*ip6h))) 485 hl, pl + sizeof(*ip6h)))
475 goto fail; 486 goto fail;
476 goto done; 487 goto done;
477 case IPPROTO_UDP: 488 case IPPROTO_UDP:
478 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) 489 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
479 if (!tcf_csum_ipv6_udp(skb, ip6h, hl, 490 if (!tcf_csum_ipv6_udp(skb, hl,
480 pl + sizeof(*ip6h), 0)) 491 pl + sizeof(*ip6h), 0))
481 goto fail; 492 goto fail;
482 goto done; 493 goto done;
483 case IPPROTO_UDPLITE: 494 case IPPROTO_UDPLITE:
484 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) 495 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
485 if (!tcf_csum_ipv6_udp(skb, ip6h, hl, 496 if (!tcf_csum_ipv6_udp(skb, hl,
486 pl + sizeof(*ip6h), 1)) 497 pl + sizeof(*ip6h), 1))
487 goto fail; 498 goto fail;
488 goto done; 499 goto done;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 964f5e4f4b8a..8e118af90973 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -22,7 +22,6 @@
22#include <linux/skbuff.h> 22#include <linux/skbuff.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/kmod.h> 24#include <linux/kmod.h>
25#include <linux/netlink.h>
26#include <linux/err.h> 25#include <linux/err.h>
27#include <linux/slab.h> 26#include <linux/slab.h>
28#include <net/net_namespace.h> 27#include <net/net_namespace.h>
@@ -118,7 +117,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
118 117
119/* Add/change/delete/get a filter node */ 118/* Add/change/delete/get a filter node */
120 119
121static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 120static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
122{ 121{
123 struct net *net = sock_net(skb->sk); 122 struct net *net = sock_net(skb->sk);
124 struct nlattr *tca[TCA_MAX + 1]; 123 struct nlattr *tca[TCA_MAX + 1];
@@ -141,7 +140,12 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
141 140
142 if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN)) 141 if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))
143 return -EPERM; 142 return -EPERM;
143
144replay: 144replay:
145 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
146 if (err < 0)
147 return err;
148
145 t = nlmsg_data(n); 149 t = nlmsg_data(n);
146 protocol = TC_H_MIN(t->tcm_info); 150 protocol = TC_H_MIN(t->tcm_info);
147 prio = TC_H_MAJ(t->tcm_info); 151 prio = TC_H_MAJ(t->tcm_info);
@@ -164,10 +168,6 @@ replay:
164 if (dev == NULL) 168 if (dev == NULL)
165 return -ENODEV; 169 return -ENODEV;
166 170
167 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
168 if (err < 0)
169 return err;
170
171 /* Find qdisc */ 171 /* Find qdisc */
172 if (!parent) { 172 if (!parent) {
173 q = dev->qdisc; 173 q = dev->qdisc;
@@ -427,7 +427,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
427 const struct Qdisc_class_ops *cops; 427 const struct Qdisc_class_ops *cops;
428 struct tcf_dump_args arg; 428 struct tcf_dump_args arg;
429 429
430 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 430 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
431 return skb->len; 431 return skb->len;
432 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 432 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
433 if (!dev) 433 if (!dev)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index aa36a8c8b33b..7881e2fccbc2 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
393 return -EOPNOTSUPP; 393 return -EOPNOTSUPP;
394 394
395 if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && 395 if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
396 sk_user_ns(NETLINK_CB(in_skb).ssk) != &init_user_ns) 396 sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)
397 return -EOPNOTSUPP; 397 return -EOPNOTSUPP;
398 } 398 }
399 399
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c297e2a8e2a1..2b935e7cfe7b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -971,13 +971,13 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
971 * Delete/get qdisc. 971 * Delete/get qdisc.
972 */ 972 */
973 973
974static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 974static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
975{ 975{
976 struct net *net = sock_net(skb->sk); 976 struct net *net = sock_net(skb->sk);
977 struct tcmsg *tcm = nlmsg_data(n); 977 struct tcmsg *tcm = nlmsg_data(n);
978 struct nlattr *tca[TCA_MAX + 1]; 978 struct nlattr *tca[TCA_MAX + 1];
979 struct net_device *dev; 979 struct net_device *dev;
980 u32 clid = tcm->tcm_parent; 980 u32 clid;
981 struct Qdisc *q = NULL; 981 struct Qdisc *q = NULL;
982 struct Qdisc *p = NULL; 982 struct Qdisc *p = NULL;
983 int err; 983 int err;
@@ -985,14 +985,15 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
985 if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN)) 985 if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
986 return -EPERM; 986 return -EPERM;
987 987
988 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
989 if (!dev)
990 return -ENODEV;
991
992 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 988 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
993 if (err < 0) 989 if (err < 0)
994 return err; 990 return err;
995 991
992 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
993 if (!dev)
994 return -ENODEV;
995
996 clid = tcm->tcm_parent;
996 if (clid) { 997 if (clid) {
997 if (clid != TC_H_ROOT) { 998 if (clid != TC_H_ROOT) {
998 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 999 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
@@ -1038,7 +1039,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1038 * Create/change qdisc. 1039 * Create/change qdisc.
1039 */ 1040 */
1040 1041
1041static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 1042static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1042{ 1043{
1043 struct net *net = sock_net(skb->sk); 1044 struct net *net = sock_net(skb->sk);
1044 struct tcmsg *tcm; 1045 struct tcmsg *tcm;
@@ -1053,6 +1054,10 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1053 1054
1054replay: 1055replay:
1055 /* Reinit, just in case something touches this. */ 1056 /* Reinit, just in case something touches this. */
1057 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1058 if (err < 0)
1059 return err;
1060
1056 tcm = nlmsg_data(n); 1061 tcm = nlmsg_data(n);
1057 clid = tcm->tcm_parent; 1062 clid = tcm->tcm_parent;
1058 q = p = NULL; 1063 q = p = NULL;
@@ -1061,9 +1066,6 @@ replay:
1061 if (!dev) 1066 if (!dev)
1062 return -ENODEV; 1067 return -ENODEV;
1063 1068
1064 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1065 if (err < 0)
1066 return err;
1067 1069
1068 if (clid) { 1070 if (clid) {
1069 if (clid != TC_H_ROOT) { 1071 if (clid != TC_H_ROOT) {
@@ -1372,7 +1374,7 @@ done:
1372 1374
1373 1375
1374 1376
1375static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 1377static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
1376{ 1378{
1377 struct net *net = sock_net(skb->sk); 1379 struct net *net = sock_net(skb->sk);
1378 struct tcmsg *tcm = nlmsg_data(n); 1380 struct tcmsg *tcm = nlmsg_data(n);
@@ -1382,22 +1384,22 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1382 const struct Qdisc_class_ops *cops; 1384 const struct Qdisc_class_ops *cops;
1383 unsigned long cl = 0; 1385 unsigned long cl = 0;
1384 unsigned long new_cl; 1386 unsigned long new_cl;
1385 u32 portid = tcm->tcm_parent; 1387 u32 portid;
1386 u32 clid = tcm->tcm_handle; 1388 u32 clid;
1387 u32 qid = TC_H_MAJ(clid); 1389 u32 qid;
1388 int err; 1390 int err;
1389 1391
1390 if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN)) 1392 if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
1391 return -EPERM; 1393 return -EPERM;
1392 1394
1393 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1394 if (!dev)
1395 return -ENODEV;
1396
1397 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 1395 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1398 if (err < 0) 1396 if (err < 0)
1399 return err; 1397 return err;
1400 1398
1399 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1400 if (!dev)
1401 return -ENODEV;
1402
1401 /* 1403 /*
1402 parent == TC_H_UNSPEC - unspecified parent. 1404 parent == TC_H_UNSPEC - unspecified parent.
1403 parent == TC_H_ROOT - class is root, which has no parent. 1405 parent == TC_H_ROOT - class is root, which has no parent.
@@ -1413,6 +1415,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1413 1415
1414 /* Step 1. Determine qdisc handle X:0 */ 1416 /* Step 1. Determine qdisc handle X:0 */
1415 1417
1418 portid = tcm->tcm_parent;
1419 clid = tcm->tcm_handle;
1420 qid = TC_H_MAJ(clid);
1421
1416 if (portid != TC_H_ROOT) { 1422 if (portid != TC_H_ROOT) {
1417 u32 qid1 = TC_H_MAJ(portid); 1423 u32 qid1 = TC_H_MAJ(portid);
1418 1424
@@ -1636,7 +1642,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1636 struct net_device *dev; 1642 struct net_device *dev;
1637 int t, s_t; 1643 int t, s_t;
1638 1644
1639 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 1645 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1640 return 0; 1646 return 0;
1641 dev = dev_get_by_index(net, tcm->tcm_ifindex); 1647 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1642 if (!dev) 1648 if (!dev)
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 571f1d211f4d..79b1876b6cd2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -981,6 +981,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
981 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) }, 981 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
982 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 982 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
983 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 983 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
984 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
984}; 985};
985 986
986static void htb_work_func(struct work_struct *work) 987static void htb_work_func(struct work_struct *work)
@@ -994,7 +995,7 @@ static void htb_work_func(struct work_struct *work)
994static int htb_init(struct Qdisc *sch, struct nlattr *opt) 995static int htb_init(struct Qdisc *sch, struct nlattr *opt)
995{ 996{
996 struct htb_sched *q = qdisc_priv(sch); 997 struct htb_sched *q = qdisc_priv(sch);
997 struct nlattr *tb[TCA_HTB_INIT + 1]; 998 struct nlattr *tb[TCA_HTB_MAX + 1];
998 struct tc_htb_glob *gopt; 999 struct tc_htb_glob *gopt;
999 int err; 1000 int err;
1000 int i; 1001 int i;
@@ -1002,20 +1003,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1002 if (!opt) 1003 if (!opt)
1003 return -EINVAL; 1004 return -EINVAL;
1004 1005
1005 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy); 1006 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1006 if (err < 0) 1007 if (err < 0)
1007 return err; 1008 return err;
1008 1009
1009 if (tb[TCA_HTB_INIT] == NULL) { 1010 if (!tb[TCA_HTB_INIT])
1010 pr_err("HTB: hey probably you have bad tc tool ?\n");
1011 return -EINVAL; 1011 return -EINVAL;
1012 } 1012
1013 gopt = nla_data(tb[TCA_HTB_INIT]); 1013 gopt = nla_data(tb[TCA_HTB_INIT]);
1014 if (gopt->version != HTB_VER >> 16) { 1014 if (gopt->version != HTB_VER >> 16)
1015 pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
1016 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1017 return -EINVAL; 1015 return -EINVAL;
1018 }
1019 1016
1020 err = qdisc_class_hash_init(&q->clhash); 1017 err = qdisc_class_hash_init(&q->clhash);
1021 if (err < 0) 1018 if (err < 0)
@@ -1027,10 +1024,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1027 INIT_WORK(&q->work, htb_work_func); 1024 INIT_WORK(&q->work, htb_work_func);
1028 skb_queue_head_init(&q->direct_queue); 1025 skb_queue_head_init(&q->direct_queue);
1029 1026
1030 q->direct_qlen = qdisc_dev(sch)->tx_queue_len; 1027 if (tb[TCA_HTB_DIRECT_QLEN])
1031 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ 1028 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1032 q->direct_qlen = 2; 1029 else {
1033 1030 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1031 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1032 q->direct_qlen = 2;
1033 }
1034 if ((q->rate2quantum = gopt->rate2quantum) < 1) 1034 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1035 q->rate2quantum = 1; 1035 q->rate2quantum = 1;
1036 q->defcls = gopt->defcls; 1036 q->defcls = gopt->defcls;
@@ -1056,7 +1056,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1056 nest = nla_nest_start(skb, TCA_OPTIONS); 1056 nest = nla_nest_start(skb, TCA_OPTIONS);
1057 if (nest == NULL) 1057 if (nest == NULL)
1058 goto nla_put_failure; 1058 goto nla_put_failure;
1059 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt)) 1059 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1060 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1060 goto nla_put_failure; 1061 goto nla_put_failure;
1061 nla_nest_end(skb, nest); 1062 nla_nest_end(skb, nest);
1062 1063
@@ -1311,7 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1311 struct htb_sched *q = qdisc_priv(sch); 1312 struct htb_sched *q = qdisc_priv(sch);
1312 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1313 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1313 struct nlattr *opt = tca[TCA_OPTIONS]; 1314 struct nlattr *opt = tca[TCA_OPTIONS];
1314 struct nlattr *tb[__TCA_HTB_MAX]; 1315 struct nlattr *tb[TCA_HTB_MAX + 1];
1315 struct tc_htb_opt *hopt; 1316 struct tc_htb_opt *hopt;
1316 1317
1317 /* extract all subattrs from opt attr */ 1318 /* extract all subattrs from opt attr */
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index d2709e2b7be6..423549a714e5 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -104,8 +104,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
104 104
105 /* Initialize the object handling fields. */ 105 /* Initialize the object handling fields. */
106 atomic_set(&asoc->base.refcnt, 1); 106 atomic_set(&asoc->base.refcnt, 1);
107 asoc->base.dead = 0; 107 asoc->base.dead = false;
108 asoc->base.malloced = 0;
109 108
110 /* Initialize the bind addr area. */ 109 /* Initialize the bind addr area. */
111 sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); 110 sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
@@ -371,7 +370,6 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
371 if (!sctp_association_init(asoc, ep, sk, scope, gfp)) 370 if (!sctp_association_init(asoc, ep, sk, scope, gfp))
372 goto fail_init; 371 goto fail_init;
373 372
374 asoc->base.malloced = 1;
375 SCTP_DBG_OBJCNT_INC(assoc); 373 SCTP_DBG_OBJCNT_INC(assoc);
376 SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); 374 SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc);
377 375
@@ -409,7 +407,7 @@ void sctp_association_free(struct sctp_association *asoc)
409 /* Mark as dead, so other users can know this structure is 407 /* Mark as dead, so other users can know this structure is
410 * going away. 408 * going away.
411 */ 409 */
412 asoc->base.dead = 1; 410 asoc->base.dead = true;
413 411
414 /* Dispose of any data lying around in the outqueue. */ 412 /* Dispose of any data lying around in the outqueue. */
415 sctp_outq_free(&asoc->outqueue); 413 sctp_outq_free(&asoc->outqueue);
@@ -484,10 +482,8 @@ static void sctp_association_destroy(struct sctp_association *asoc)
484 482
485 WARN_ON(atomic_read(&asoc->rmem_alloc)); 483 WARN_ON(atomic_read(&asoc->rmem_alloc));
486 484
487 if (asoc->base.malloced) { 485 kfree(asoc);
488 kfree(asoc); 486 SCTP_DBG_OBJCNT_DEC(assoc);
489 SCTP_DBG_OBJCNT_DEC(assoc);
490 }
491} 487}
492 488
493/* Change the primary destination address for the peer. */ 489/* Change the primary destination address for the peer. */
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index d886b3bf84f5..41145fe31813 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -131,8 +131,6 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
131 */ 131 */
132void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port) 132void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port)
133{ 133{
134 bp->malloced = 0;
135
136 INIT_LIST_HEAD(&bp->address_list); 134 INIT_LIST_HEAD(&bp->address_list);
137 bp->port = port; 135 bp->port = port;
138} 136}
@@ -155,11 +153,6 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
155{ 153{
156 /* Empty the bind address list. */ 154 /* Empty the bind address list. */
157 sctp_bind_addr_clean(bp); 155 sctp_bind_addr_clean(bp);
158
159 if (bp->malloced) {
160 kfree(bp);
161 SCTP_DBG_OBJCNT_DEC(bind_addr);
162 }
163} 156}
164 157
165/* Add an address to the bind address list in the SCTP_bind_addr structure. */ 158/* Add an address to the bind address list in the SCTP_bind_addr structure. */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 12ed45dbe75d..5fbd7bc6bb11 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -121,8 +121,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
121 121
122 /* Initialize the basic object fields. */ 122 /* Initialize the basic object fields. */
123 atomic_set(&ep->base.refcnt, 1); 123 atomic_set(&ep->base.refcnt, 1);
124 ep->base.dead = 0; 124 ep->base.dead = false;
125 ep->base.malloced = 1;
126 125
127 /* Create an input queue. */ 126 /* Create an input queue. */
128 sctp_inq_init(&ep->base.inqueue); 127 sctp_inq_init(&ep->base.inqueue);
@@ -198,7 +197,7 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp)
198 goto fail; 197 goto fail;
199 if (!sctp_endpoint_init(ep, sk, gfp)) 198 if (!sctp_endpoint_init(ep, sk, gfp))
200 goto fail_init; 199 goto fail_init;
201 ep->base.malloced = 1; 200
202 SCTP_DBG_OBJCNT_INC(ep); 201 SCTP_DBG_OBJCNT_INC(ep);
203 return ep; 202 return ep;
204 203
@@ -234,7 +233,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,
234 */ 233 */
235void sctp_endpoint_free(struct sctp_endpoint *ep) 234void sctp_endpoint_free(struct sctp_endpoint *ep)
236{ 235{
237 ep->base.dead = 1; 236 ep->base.dead = true;
238 237
239 ep->base.sk->sk_state = SCTP_SS_CLOSED; 238 ep->base.sk->sk_state = SCTP_SS_CLOSED;
240 239
@@ -279,11 +278,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
279 if (ep->base.sk) 278 if (ep->base.sk)
280 sock_put(ep->base.sk); 279 sock_put(ep->base.sk);
281 280
282 /* Finally, free up our memory. */ 281 kfree(ep);
283 if (ep->base.malloced) { 282 SCTP_DBG_OBJCNT_DEC(ep);
284 kfree(ep);
285 SCTP_DBG_OBJCNT_DEC(ep);
286 }
287} 283}
288 284
289/* Hold a reference to an endpoint. */ 285/* Hold a reference to an endpoint. */
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 2d5ad280de38..3221d073448c 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -58,8 +58,6 @@ void sctp_inq_init(struct sctp_inq *queue)
58 58
59 /* Create a task for delivering data. */ 59 /* Create a task for delivering data. */
60 INIT_WORK(&queue->immediate, NULL); 60 INIT_WORK(&queue->immediate, NULL);
61
62 queue->malloced = 0;
63} 61}
64 62
65/* Release the memory associated with an SCTP inqueue. */ 63/* Release the memory associated with an SCTP inqueue. */
@@ -80,11 +78,6 @@ void sctp_inq_free(struct sctp_inq *queue)
80 sctp_chunk_free(queue->in_progress); 78 sctp_chunk_free(queue->in_progress);
81 queue->in_progress = NULL; 79 queue->in_progress = NULL;
82 } 80 }
83
84 if (queue->malloced) {
85 /* Dump the master memory segment. */
86 kfree(queue);
87 }
88} 81}
89 82
90/* Put a new packet in an SCTP inqueue. 83/* Put a new packet in an SCTP inqueue.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f5200a2ad852..bbef4a7a9b56 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -136,7 +136,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
136 packet->overhead = overhead; 136 packet->overhead = overhead;
137 sctp_packet_reset(packet); 137 sctp_packet_reset(packet);
138 packet->vtag = 0; 138 packet->vtag = 0;
139 packet->malloced = 0; 139
140 return packet; 140 return packet;
141} 141}
142 142
@@ -151,9 +151,6 @@ void sctp_packet_free(struct sctp_packet *packet)
151 list_del_init(&chunk->list); 151 list_del_init(&chunk->list);
152 sctp_chunk_free(chunk); 152 sctp_chunk_free(chunk);
153 } 153 }
154
155 if (packet->malloced)
156 kfree(packet);
157} 154}
158 155
159/* This routine tries to append the chunk to the offered packet. If adding 156/* This routine tries to append the chunk to the offered packet. If adding
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 01dca753db16..32a4625fef77 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -217,8 +217,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
217 q->outstanding_bytes = 0; 217 q->outstanding_bytes = 0;
218 q->empty = 1; 218 q->empty = 1;
219 q->cork = 0; 219 q->cork = 0;
220
221 q->malloced = 0;
222 q->out_qlen = 0; 220 q->out_qlen = 0;
223} 221}
224 222
@@ -295,10 +293,6 @@ void sctp_outq_free(struct sctp_outq *q)
295{ 293{
296 /* Throw away leftover chunks. */ 294 /* Throw away leftover chunks. */
297 __sctp_outq_teardown(q); 295 __sctp_outq_teardown(q);
298
299 /* If we were kmalloc()'d, free the memory. */
300 if (q->malloced)
301 kfree(q);
302} 296}
303 297
304/* Put a new chunk in an sctp_outq. */ 298/* Put a new chunk in an sctp_outq. */
@@ -707,11 +701,10 @@ redo:
707/* Cork the outqueue so queued chunks are really queued. */ 701/* Cork the outqueue so queued chunks are really queued. */
708int sctp_outq_uncork(struct sctp_outq *q) 702int sctp_outq_uncork(struct sctp_outq *q)
709{ 703{
710 int error = 0;
711 if (q->cork) 704 if (q->cork)
712 q->cork = 0; 705 q->cork = 0;
713 error = sctp_outq_flush(q, 0); 706
714 return error; 707 return sctp_outq_flush(q, 0);
715} 708}
716 709
717 710
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ab3bba8cb0a8..4e45ee35d0db 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -295,7 +295,8 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
295 seq_printf(seq, " ASSOC SOCK STY SST ST HBKT " 295 seq_printf(seq, " ASSOC SOCK STY SST ST HBKT "
296 "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " 296 "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
297 "RPORT LADDRS <-> RADDRS " 297 "RPORT LADDRS <-> RADDRS "
298 "HBINT INS OUTS MAXRT T1X T2X RTXC\n"); 298 "HBINT INS OUTS MAXRT T1X T2X RTXC "
299 "wmema wmemq sndbuf rcvbuf\n");
299 300
300 return (void *)pos; 301 return (void *)pos;
301} 302}
@@ -349,11 +350,16 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
349 sctp_seq_dump_local_addrs(seq, epb); 350 sctp_seq_dump_local_addrs(seq, epb);
350 seq_printf(seq, "<-> "); 351 seq_printf(seq, "<-> ");
351 sctp_seq_dump_remote_addrs(seq, assoc); 352 sctp_seq_dump_remote_addrs(seq, assoc);
352 seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d ", 353 seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
354 "%8d %8d %8d %8d",
353 assoc->hbinterval, assoc->c.sinit_max_instreams, 355 assoc->hbinterval, assoc->c.sinit_max_instreams,
354 assoc->c.sinit_num_ostreams, assoc->max_retrans, 356 assoc->c.sinit_num_ostreams, assoc->max_retrans,
355 assoc->init_retries, assoc->shutdown_retries, 357 assoc->init_retries, assoc->shutdown_retries,
356 assoc->rtx_data_chunks); 358 assoc->rtx_data_chunks,
359 atomic_read(&sk->sk_wmem_alloc),
360 sk->sk_wmem_queued,
361 sk->sk_sndbuf,
362 sk->sk_rcvbuf);
357 seq_printf(seq, "\n"); 363 seq_printf(seq, "\n");
358 } 364 }
359 read_unlock(&head->lock); 365 read_unlock(&head->lock);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b9070736b8d9..f631c5ff4dbf 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1119,9 +1119,10 @@ static int __sctp_connect(struct sock* sk,
1119 /* Make sure the destination port is correctly set 1119 /* Make sure the destination port is correctly set
1120 * in all addresses. 1120 * in all addresses.
1121 */ 1121 */
1122 if (asoc && asoc->peer.port && asoc->peer.port != port) 1122 if (asoc && asoc->peer.port && asoc->peer.port != port) {
1123 err = -EINVAL;
1123 goto out_free; 1124 goto out_free;
1124 1125 }
1125 1126
1126 /* Check if there already is a matching association on the 1127 /* Check if there already is a matching association on the
1127 * endpoint (other than the one created here). 1128 * endpoint (other than the one created here).
@@ -6185,7 +6186,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
6185 6186
6186 /* Is there any exceptional events? */ 6187 /* Is there any exceptional events? */
6187 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 6188 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
6188 mask |= POLLERR; 6189 mask |= POLLERR |
6190 sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
6189 if (sk->sk_shutdown & RCV_SHUTDOWN) 6191 if (sk->sk_shutdown & RCV_SHUTDOWN)
6190 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 6192 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
6191 if (sk->sk_shutdown == SHUTDOWN_MASK) 6193 if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index 825ea94415b3..da8603523808 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -74,7 +74,6 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out,
74 if (!sctp_ssnmap_init(retval, in, out)) 74 if (!sctp_ssnmap_init(retval, in, out))
75 goto fail_map; 75 goto fail_map;
76 76
77 retval->malloced = 1;
78 SCTP_DBG_OBJCNT_INC(ssnmap); 77 SCTP_DBG_OBJCNT_INC(ssnmap);
79 78
80 return retval; 79 return retval;
@@ -118,14 +117,16 @@ void sctp_ssnmap_clear(struct sctp_ssnmap *map)
118/* Dispose of a ssnmap. */ 117/* Dispose of a ssnmap. */
119void sctp_ssnmap_free(struct sctp_ssnmap *map) 118void sctp_ssnmap_free(struct sctp_ssnmap *map)
120{ 119{
121 if (map && map->malloced) { 120 int size;
122 int size; 121
123 122 if (unlikely(!map))
124 size = sctp_ssnmap_size(map->in.len, map->out.len); 123 return;
125 if (size <= KMALLOC_MAX_SIZE) 124
126 kfree(map); 125 size = sctp_ssnmap_size(map->in.len, map->out.len);
127 else 126 if (size <= KMALLOC_MAX_SIZE)
128 free_pages((unsigned long)map, get_order(size)); 127 kfree(map);
129 SCTP_DBG_OBJCNT_DEC(ssnmap); 128 else
130 } 129 free_pages((unsigned long)map, get_order(size));
130
131 SCTP_DBG_OBJCNT_DEC(ssnmap);
131} 132}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index fafd2a461ba0..098f1d5f769e 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -123,7 +123,6 @@ struct sctp_transport *sctp_transport_new(struct net *net,
123 if (!sctp_transport_init(net, transport, addr, gfp)) 123 if (!sctp_transport_init(net, transport, addr, gfp))
124 goto fail_init; 124 goto fail_init;
125 125
126 transport->malloced = 1;
127 SCTP_DBG_OBJCNT_INC(transport); 126 SCTP_DBG_OBJCNT_INC(transport);
128 127
129 return transport; 128 return transport;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 0fd5b3d2df03..04e3d470f877 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -68,7 +68,6 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq,
68 skb_queue_head_init(&ulpq->reasm); 68 skb_queue_head_init(&ulpq->reasm);
69 skb_queue_head_init(&ulpq->lobby); 69 skb_queue_head_init(&ulpq->lobby);
70 ulpq->pd_mode = 0; 70 ulpq->pd_mode = 0;
71 ulpq->malloced = 0;
72 71
73 return ulpq; 72 return ulpq;
74} 73}
@@ -96,8 +95,6 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq)
96void sctp_ulpq_free(struct sctp_ulpq *ulpq) 95void sctp_ulpq_free(struct sctp_ulpq *ulpq)
97{ 96{
98 sctp_ulpq_flush(ulpq); 97 sctp_ulpq_flush(ulpq);
99 if (ulpq->malloced)
100 kfree(ulpq);
101} 98}
102 99
103/* Process an incoming DATA chunk. */ 100/* Process an incoming DATA chunk. */
diff --git a/net/socket.c b/net/socket.c
index 88f759adf3af..280283f03ccc 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -600,7 +600,7 @@ void sock_release(struct socket *sock)
600} 600}
601EXPORT_SYMBOL(sock_release); 601EXPORT_SYMBOL(sock_release);
602 602
603int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) 603void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
604{ 604{
605 *tx_flags = 0; 605 *tx_flags = 0;
606 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 606 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
@@ -609,7 +609,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
609 *tx_flags |= SKBTX_SW_TSTAMP; 609 *tx_flags |= SKBTX_SW_TSTAMP;
610 if (sock_flag(sk, SOCK_WIFI_STATUS)) 610 if (sock_flag(sk, SOCK_WIFI_STATUS))
611 *tx_flags |= SKBTX_WIFI_STATUS; 611 *tx_flags |= SKBTX_WIFI_STATUS;
612 return 0;
613} 612}
614EXPORT_SYMBOL(sock_tx_timestamp); 613EXPORT_SYMBOL(sock_tx_timestamp);
615 614
@@ -682,16 +681,6 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
682} 681}
683EXPORT_SYMBOL(kernel_sendmsg); 682EXPORT_SYMBOL(kernel_sendmsg);
684 683
685static int ktime2ts(ktime_t kt, struct timespec *ts)
686{
687 if (kt.tv64) {
688 *ts = ktime_to_timespec(kt);
689 return 1;
690 } else {
691 return 0;
692 }
693}
694
695/* 684/*
696 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 685 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
697 */ 686 */
@@ -724,17 +713,15 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
724 713
725 714
726 memset(ts, 0, sizeof(ts)); 715 memset(ts, 0, sizeof(ts));
727 if (skb->tstamp.tv64 && 716 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
728 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { 717 ktime_to_timespec_cond(skb->tstamp, ts + 0))
729 skb_get_timestampns(skb, ts + 0);
730 empty = 0; 718 empty = 0;
731 }
732 if (shhwtstamps) { 719 if (shhwtstamps) {
733 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && 720 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
734 ktime2ts(shhwtstamps->syststamp, ts + 1)) 721 ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
735 empty = 0; 722 empty = 0;
736 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && 723 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
737 ktime2ts(shhwtstamps->hwtstamp, ts + 2)) 724 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
738 empty = 0; 725 empty = 0;
739 } 726 }
740 if (!empty) 727 if (!empty)
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 4f99600a5fed..c890848f9d56 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -31,3 +31,10 @@ config TIPC_PORTS
31 31
32 Setting this to a smaller value saves some memory, 32 Setting this to a smaller value saves some memory,
33 setting it to higher allows for more ports. 33 setting it to higher allows for more ports.
34
35config TIPC_MEDIA_IB
36 bool "InfiniBand media type support"
37 depends on TIPC && INFINIBAND_IPOIB
38 help
39 Saying Y here will enable support for running TIPC on
40 IP-over-InfiniBand devices.
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 6cd55d671d3a..4df8e02d9008 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,3 +9,5 @@ tipc-y += addr.o bcast.o bearer.o config.o \
9 name_distr.o subscr.o name_table.o net.o \ 9 name_distr.o subscr.o name_table.o net.o \
10 netlink.o node.o node_subscr.o port.o ref.o \ 10 netlink.o node.o node_subscr.o port.o ref.o \
11 socket.o log.o eth_media.o 11 socket.o log.o eth_media.o
12
13tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 2655c9f4ecad..25e159c2feb4 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -620,10 +620,10 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
620 continue; /* bearer pair doesn't add anything */ 620 continue; /* bearer pair doesn't add anything */
621 621
622 if (!tipc_bearer_blocked(p)) 622 if (!tipc_bearer_blocked(p))
623 tipc_bearer_send(p, buf, &p->media->bcast_addr); 623 tipc_bearer_send(p, buf, &p->bcast_addr);
624 else if (s && !tipc_bearer_blocked(s)) 624 else if (s && !tipc_bearer_blocked(s))
625 /* unable to send on primary bearer */ 625 /* unable to send on primary bearer */
626 tipc_bearer_send(s, buf, &s->media->bcast_addr); 626 tipc_bearer_send(s, buf, &s->bcast_addr);
627 else 627 else
628 /* unable to send on either bearer */ 628 /* unable to send on either bearer */
629 continue; 629 continue;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index aa62f93a9127..cb29ef7ba2f0 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -39,7 +39,7 @@
39#include "bearer.h" 39#include "bearer.h"
40#include "discover.h" 40#include "discover.h"
41 41
42#define MAX_ADDR_STR 32 42#define MAX_ADDR_STR 60
43 43
44static struct tipc_media *media_list[MAX_MEDIA]; 44static struct tipc_media *media_list[MAX_MEDIA];
45static u32 media_count; 45static u32 media_count;
@@ -89,9 +89,6 @@ int tipc_register_media(struct tipc_media *m_ptr)
89 89
90 if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME) 90 if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME)
91 goto exit; 91 goto exit;
92 if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) ||
93 !m_ptr->bcast_addr.broadcast)
94 goto exit;
95 if (m_ptr->priority > TIPC_MAX_LINK_PRI) 92 if (m_ptr->priority > TIPC_MAX_LINK_PRI)
96 goto exit; 93 goto exit;
97 if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) || 94 if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) ||
@@ -407,7 +404,7 @@ restart:
407 INIT_LIST_HEAD(&b_ptr->links); 404 INIT_LIST_HEAD(&b_ptr->links);
408 spin_lock_init(&b_ptr->lock); 405 spin_lock_init(&b_ptr->lock);
409 406
410 res = tipc_disc_create(b_ptr, &m_ptr->bcast_addr, disc_domain); 407 res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain);
411 if (res) { 408 if (res) {
412 bearer_disable(b_ptr); 409 bearer_disable(b_ptr);
413 pr_warn("Bearer <%s> rejected, discovery object creation failed\n", 410 pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 39f1192d04bf..09c869adcfcf 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -56,6 +56,7 @@
56 * Identifiers of supported TIPC media types 56 * Identifiers of supported TIPC media types
57 */ 57 */
58#define TIPC_MEDIA_TYPE_ETH 1 58#define TIPC_MEDIA_TYPE_ETH 1
59#define TIPC_MEDIA_TYPE_IB 2
59 60
60/** 61/**
61 * struct tipc_media_addr - destination address used by TIPC bearers 62 * struct tipc_media_addr - destination address used by TIPC bearers
@@ -77,7 +78,6 @@ struct tipc_bearer;
77 * @enable_bearer: routine which enables a bearer 78 * @enable_bearer: routine which enables a bearer
78 * @disable_bearer: routine which disables a bearer 79 * @disable_bearer: routine which disables a bearer
79 * @addr2str: routine which converts media address to string 80 * @addr2str: routine which converts media address to string
80 * @str2addr: routine which converts media address from string
81 * @addr2msg: routine which converts media address to protocol message area 81 * @addr2msg: routine which converts media address to protocol message area
82 * @msg2addr: routine which converts media address from protocol message area 82 * @msg2addr: routine which converts media address from protocol message area
83 * @bcast_addr: media address used in broadcasting 83 * @bcast_addr: media address used in broadcasting
@@ -94,10 +94,9 @@ struct tipc_media {
94 int (*enable_bearer)(struct tipc_bearer *b_ptr); 94 int (*enable_bearer)(struct tipc_bearer *b_ptr);
95 void (*disable_bearer)(struct tipc_bearer *b_ptr); 95 void (*disable_bearer)(struct tipc_bearer *b_ptr);
96 int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); 96 int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size);
97 int (*str2addr)(struct tipc_media_addr *a, char *str_buf);
98 int (*addr2msg)(struct tipc_media_addr *a, char *msg_area); 97 int (*addr2msg)(struct tipc_media_addr *a, char *msg_area);
99 int (*msg2addr)(struct tipc_media_addr *a, char *msg_area); 98 int (*msg2addr)(const struct tipc_bearer *b_ptr,
100 struct tipc_media_addr bcast_addr; 99 struct tipc_media_addr *a, char *msg_area);
101 u32 priority; 100 u32 priority;
102 u32 tolerance; 101 u32 tolerance;
103 u32 window; 102 u32 window;
@@ -136,6 +135,7 @@ struct tipc_bearer {
136 char name[TIPC_MAX_BEARER_NAME]; 135 char name[TIPC_MAX_BEARER_NAME];
137 spinlock_t lock; 136 spinlock_t lock;
138 struct tipc_media *media; 137 struct tipc_media *media;
138 struct tipc_media_addr bcast_addr;
139 u32 priority; 139 u32 priority;
140 u32 window; 140 u32 window;
141 u32 tolerance; 141 u32 tolerance;
@@ -175,6 +175,14 @@ int tipc_disable_bearer(const char *name);
175int tipc_eth_media_start(void); 175int tipc_eth_media_start(void);
176void tipc_eth_media_stop(void); 176void tipc_eth_media_stop(void);
177 177
178#ifdef CONFIG_TIPC_MEDIA_IB
179int tipc_ib_media_start(void);
180void tipc_ib_media_stop(void);
181#else
182static inline int tipc_ib_media_start(void) { return 0; }
183static inline void tipc_ib_media_stop(void) { return; }
184#endif
185
178int tipc_media_set_priority(const char *name, u32 new_value); 186int tipc_media_set_priority(const char *name, u32 new_value);
179int tipc_media_set_window(const char *name, u32 new_value); 187int tipc_media_set_window(const char *name, u32 new_value);
180void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); 188void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index fc05cecd7481..7ec2c1eb94f1 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -82,6 +82,7 @@ static void tipc_core_stop_net(void)
82{ 82{
83 tipc_net_stop(); 83 tipc_net_stop();
84 tipc_eth_media_stop(); 84 tipc_eth_media_stop();
85 tipc_ib_media_stop();
85} 86}
86 87
87/** 88/**
@@ -93,8 +94,15 @@ int tipc_core_start_net(unsigned long addr)
93 94
94 tipc_net_start(addr); 95 tipc_net_start(addr);
95 res = tipc_eth_media_start(); 96 res = tipc_eth_media_start();
96 if (res) 97 if (res < 0)
97 tipc_core_stop_net(); 98 goto err;
99 res = tipc_ib_media_start();
100 if (res < 0)
101 goto err;
102 return res;
103
104err:
105 tipc_core_stop_net();
98 return res; 106 return res;
99} 107}
100 108
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 1074b9587e81..eedff58d0387 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -129,7 +129,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
129 int link_fully_up; 129 int link_fully_up;
130 130
131 media_addr.broadcast = 1; 131 media_addr.broadcast = 1;
132 b_ptr->media->msg2addr(&media_addr, msg_media_addr(msg)); 132 b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg));
133 kfree_skb(buf); 133 kfree_skb(buf);
134 134
135 /* Ensure message from node is valid and communication is permitted */ 135 /* Ensure message from node is valid and communication is permitted */
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 2132c1ef2951..120a676a3360 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -77,12 +77,13 @@ static struct notifier_block notifier = {
77 * Media-dependent "value" field stores MAC address in first 6 bytes 77 * Media-dependent "value" field stores MAC address in first 6 bytes
78 * and zeroes out the remaining bytes. 78 * and zeroes out the remaining bytes.
79 */ 79 */
80static void eth_media_addr_set(struct tipc_media_addr *a, char *mac) 80static void eth_media_addr_set(const struct tipc_bearer *tb_ptr,
81 struct tipc_media_addr *a, char *mac)
81{ 82{
82 memcpy(a->value, mac, ETH_ALEN); 83 memcpy(a->value, mac, ETH_ALEN);
83 memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN); 84 memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN);
84 a->media_id = TIPC_MEDIA_TYPE_ETH; 85 a->media_id = TIPC_MEDIA_TYPE_ETH;
85 a->broadcast = !memcmp(mac, eth_media_info.bcast_addr.value, ETH_ALEN); 86 a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, ETH_ALEN);
86} 87}
87 88
88/** 89/**
@@ -110,6 +111,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
110 111
111 skb_reset_network_header(clone); 112 skb_reset_network_header(clone);
112 clone->dev = dev; 113 clone->dev = dev;
114 clone->protocol = htons(ETH_P_TIPC);
113 dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, 115 dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
114 dev->dev_addr, clone->len); 116 dev->dev_addr, clone->len);
115 dev_queue_xmit(clone); 117 dev_queue_xmit(clone);
@@ -201,9 +203,13 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
201 /* Associate TIPC bearer with Ethernet bearer */ 203 /* Associate TIPC bearer with Ethernet bearer */
202 eb_ptr->bearer = tb_ptr; 204 eb_ptr->bearer = tb_ptr;
203 tb_ptr->usr_handle = (void *)eb_ptr; 205 tb_ptr->usr_handle = (void *)eb_ptr;
206 memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value));
207 memcpy(tb_ptr->bcast_addr.value, dev->broadcast, ETH_ALEN);
208 tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_ETH;
209 tb_ptr->bcast_addr.broadcast = 1;
204 tb_ptr->mtu = dev->mtu; 210 tb_ptr->mtu = dev->mtu;
205 tb_ptr->blocked = 0; 211 tb_ptr->blocked = 0;
206 eth_media_addr_set(&tb_ptr->addr, (char *)dev->dev_addr); 212 eth_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);
207 return 0; 213 return 0;
208} 214}
209 215
@@ -302,25 +308,6 @@ static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
302} 308}
303 309
304/** 310/**
305 * eth_str2addr - convert string to Ethernet address
306 */
307static int eth_str2addr(struct tipc_media_addr *a, char *str_buf)
308{
309 char mac[ETH_ALEN];
310 int r;
311
312 r = sscanf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x",
313 (u32 *)&mac[0], (u32 *)&mac[1], (u32 *)&mac[2],
314 (u32 *)&mac[3], (u32 *)&mac[4], (u32 *)&mac[5]);
315
316 if (r != ETH_ALEN)
317 return 1;
318
319 eth_media_addr_set(a, mac);
320 return 0;
321}
322
323/**
324 * eth_str2addr - convert Ethernet address format to message header format 311 * eth_str2addr - convert Ethernet address format to message header format
325 */ 312 */
326static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) 313static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
@@ -334,12 +321,13 @@ static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
334/** 321/**
335 * eth_str2addr - convert message header address format to Ethernet format 322 * eth_str2addr - convert message header address format to Ethernet format
336 */ 323 */
337static int eth_msg2addr(struct tipc_media_addr *a, char *msg_area) 324static int eth_msg2addr(const struct tipc_bearer *tb_ptr,
325 struct tipc_media_addr *a, char *msg_area)
338{ 326{
339 if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH) 327 if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH)
340 return 1; 328 return 1;
341 329
342 eth_media_addr_set(a, msg_area + ETH_ADDR_OFFSET); 330 eth_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET);
343 return 0; 331 return 0;
344} 332}
345 333
@@ -351,11 +339,8 @@ static struct tipc_media eth_media_info = {
351 .enable_bearer = enable_bearer, 339 .enable_bearer = enable_bearer,
352 .disable_bearer = disable_bearer, 340 .disable_bearer = disable_bearer,
353 .addr2str = eth_addr2str, 341 .addr2str = eth_addr2str,
354 .str2addr = eth_str2addr,
355 .addr2msg = eth_addr2msg, 342 .addr2msg = eth_addr2msg,
356 .msg2addr = eth_msg2addr, 343 .msg2addr = eth_msg2addr,
357 .bcast_addr = { { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
358 TIPC_MEDIA_TYPE_ETH, 1 },
359 .priority = TIPC_DEF_LINK_PRI, 344 .priority = TIPC_DEF_LINK_PRI,
360 .tolerance = TIPC_DEF_LINK_TOL, 345 .tolerance = TIPC_DEF_LINK_TOL,
361 .window = TIPC_DEF_LINK_WIN, 346 .window = TIPC_DEF_LINK_WIN,
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
new file mode 100644
index 000000000000..2a2864c25e15
--- /dev/null
+++ b/net/tipc/ib_media.c
@@ -0,0 +1,387 @@
1/*
2 * net/tipc/ib_media.c: Infiniband bearer support for TIPC
3 *
4 * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
5 *
6 * Based on eth_media.c, which carries the following copyright notice:
7 *
8 * Copyright (c) 2001-2007, Ericsson AB
9 * Copyright (c) 2005-2008, 2011, Wind River Systems
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the names of the copyright holders nor the names of its
21 * contributors may be used to endorse or promote products derived from
22 * this software without specific prior written permission.
23 *
24 * Alternatively, this software may be distributed under the terms of the
25 * GNU General Public License ("GPL") version 2 as published by the Free
26 * Software Foundation.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41#include <linux/if_infiniband.h>
42#include "core.h"
43#include "bearer.h"
44
45#define MAX_IB_BEARERS MAX_BEARERS
46
47/**
48 * struct ib_bearer - Infiniband bearer data structure
49 * @bearer: ptr to associated "generic" bearer structure
50 * @dev: ptr to associated Infiniband network device
51 * @tipc_packet_type: used in binding TIPC to Infiniband driver
52 * @cleanup: work item used when disabling bearer
53 */
54
55struct ib_bearer {
56 struct tipc_bearer *bearer;
57 struct net_device *dev;
58 struct packet_type tipc_packet_type;
59 struct work_struct setup;
60 struct work_struct cleanup;
61};
62
63static struct tipc_media ib_media_info;
64static struct ib_bearer ib_bearers[MAX_IB_BEARERS];
65static int ib_started;
66
67/**
68 * ib_media_addr_set - initialize Infiniband media address structure
69 *
70 * Media-dependent "value" field stores MAC address in first 6 bytes
71 * and zeroes out the remaining bytes.
72 */
73static void ib_media_addr_set(const struct tipc_bearer *tb_ptr,
74 struct tipc_media_addr *a, char *mac)
75{
76 BUILD_BUG_ON(sizeof(a->value) < INFINIBAND_ALEN);
77 memcpy(a->value, mac, INFINIBAND_ALEN);
78 a->media_id = TIPC_MEDIA_TYPE_IB;
79 a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, INFINIBAND_ALEN);
80}
81
82/**
83 * send_msg - send a TIPC message out over an InfiniBand interface
84 */
85static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
86 struct tipc_media_addr *dest)
87{
88 struct sk_buff *clone;
89 struct net_device *dev;
90 int delta;
91
92 clone = skb_clone(buf, GFP_ATOMIC);
93 if (!clone)
94 return 0;
95
96 dev = ((struct ib_bearer *)(tb_ptr->usr_handle))->dev;
97 delta = dev->hard_header_len - skb_headroom(buf);
98
99 if ((delta > 0) &&
100 pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
101 kfree_skb(clone);
102 return 0;
103 }
104
105 skb_reset_network_header(clone);
106 clone->dev = dev;
107 clone->protocol = htons(ETH_P_TIPC);
108 dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
109 dev->dev_addr, clone->len);
110 dev_queue_xmit(clone);
111 return 0;
112}
113
114/**
115 * recv_msg - handle incoming TIPC message from an InfiniBand interface
116 *
117 * Accept only packets explicitly sent to this node, or broadcast packets;
118 * ignores packets sent using InfiniBand multicast, and traffic sent to other
119 * nodes (which can happen if interface is running in promiscuous mode).
120 */
121static int recv_msg(struct sk_buff *buf, struct net_device *dev,
122 struct packet_type *pt, struct net_device *orig_dev)
123{
124 struct ib_bearer *ib_ptr = (struct ib_bearer *)pt->af_packet_priv;
125
126 if (!net_eq(dev_net(dev), &init_net)) {
127 kfree_skb(buf);
128 return 0;
129 }
130
131 if (likely(ib_ptr->bearer)) {
132 if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
133 buf->next = NULL;
134 tipc_recv_msg(buf, ib_ptr->bearer);
135 return 0;
136 }
137 }
138 kfree_skb(buf);
139 return 0;
140}
141
142/**
143 * setup_bearer - setup association between InfiniBand bearer and interface
144 */
145static void setup_bearer(struct work_struct *work)
146{
147 struct ib_bearer *ib_ptr =
148 container_of(work, struct ib_bearer, setup);
149
150 dev_add_pack(&ib_ptr->tipc_packet_type);
151}
152
153/**
154 * enable_bearer - attach TIPC bearer to an InfiniBand interface
155 */
156static int enable_bearer(struct tipc_bearer *tb_ptr)
157{
158 struct net_device *dev = NULL;
159 struct net_device *pdev = NULL;
160 struct ib_bearer *ib_ptr = &ib_bearers[0];
161 struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
162 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
163 int pending_dev = 0;
164
165 /* Find unused InfiniBand bearer structure */
166 while (ib_ptr->dev) {
167 if (!ib_ptr->bearer)
168 pending_dev++;
169 if (++ib_ptr == stop)
170 return pending_dev ? -EAGAIN : -EDQUOT;
171 }
172
173 /* Find device with specified name */
174 read_lock(&dev_base_lock);
175 for_each_netdev(&init_net, pdev) {
176 if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
177 dev = pdev;
178 dev_hold(dev);
179 break;
180 }
181 }
182 read_unlock(&dev_base_lock);
183 if (!dev)
184 return -ENODEV;
185
186 /* Create InfiniBand bearer for device */
187 ib_ptr->dev = dev;
188 ib_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
189 ib_ptr->tipc_packet_type.dev = dev;
190 ib_ptr->tipc_packet_type.func = recv_msg;
191 ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr;
192 INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list));
193 INIT_WORK(&ib_ptr->setup, setup_bearer);
194 schedule_work(&ib_ptr->setup);
195
196 /* Associate TIPC bearer with InfiniBand bearer */
197 ib_ptr->bearer = tb_ptr;
198 tb_ptr->usr_handle = (void *)ib_ptr;
199 memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value));
200 memcpy(tb_ptr->bcast_addr.value, dev->broadcast, INFINIBAND_ALEN);
201 tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_IB;
202 tb_ptr->bcast_addr.broadcast = 1;
203 tb_ptr->mtu = dev->mtu;
204 tb_ptr->blocked = 0;
205 ib_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);
206 return 0;
207}
208
209/**
210 * cleanup_bearer - break association between InfiniBand bearer and interface
211 *
212 * This routine must be invoked from a work queue because it can sleep.
213 */
214static void cleanup_bearer(struct work_struct *work)
215{
216 struct ib_bearer *ib_ptr =
217 container_of(work, struct ib_bearer, cleanup);
218
219 dev_remove_pack(&ib_ptr->tipc_packet_type);
220 dev_put(ib_ptr->dev);
221 ib_ptr->dev = NULL;
222}
223
224/**
225 * disable_bearer - detach TIPC bearer from an InfiniBand interface
226 *
227 * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away,
228 * then get worker thread to complete bearer cleanup. (Can't do cleanup
229 * here because cleanup code needs to sleep and caller holds spinlocks.)
230 */
231static void disable_bearer(struct tipc_bearer *tb_ptr)
232{
233 struct ib_bearer *ib_ptr = (struct ib_bearer *)tb_ptr->usr_handle;
234
235 ib_ptr->bearer = NULL;
236 INIT_WORK(&ib_ptr->cleanup, cleanup_bearer);
237 schedule_work(&ib_ptr->cleanup);
238}
239
240/**
241 * recv_notification - handle device updates from OS
242 *
243 * Change the state of the InfiniBand bearer (if any) associated with the
244 * specified device.
245 */
246static int recv_notification(struct notifier_block *nb, unsigned long evt,
247 void *dv)
248{
249 struct net_device *dev = (struct net_device *)dv;
250 struct ib_bearer *ib_ptr = &ib_bearers[0];
251 struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
252
253 if (!net_eq(dev_net(dev), &init_net))
254 return NOTIFY_DONE;
255
256 while ((ib_ptr->dev != dev)) {
257 if (++ib_ptr == stop)
258 return NOTIFY_DONE; /* couldn't find device */
259 }
260 if (!ib_ptr->bearer)
261 return NOTIFY_DONE; /* bearer had been disabled */
262
263 ib_ptr->bearer->mtu = dev->mtu;
264
265 switch (evt) {
266 case NETDEV_CHANGE:
267 if (netif_carrier_ok(dev))
268 tipc_continue(ib_ptr->bearer);
269 else
270 tipc_block_bearer(ib_ptr->bearer->name);
271 break;
272 case NETDEV_UP:
273 tipc_continue(ib_ptr->bearer);
274 break;
275 case NETDEV_DOWN:
276 tipc_block_bearer(ib_ptr->bearer->name);
277 break;
278 case NETDEV_CHANGEMTU:
279 case NETDEV_CHANGEADDR:
280 tipc_block_bearer(ib_ptr->bearer->name);
281 tipc_continue(ib_ptr->bearer);
282 break;
283 case NETDEV_UNREGISTER:
284 case NETDEV_CHANGENAME:
285 tipc_disable_bearer(ib_ptr->bearer->name);
286 break;
287 }
288 return NOTIFY_OK;
289}
290
291static struct notifier_block notifier = {
292 .notifier_call = recv_notification,
293 .priority = 0,
294};
295
296/**
297 * ib_addr2str - convert InfiniBand address to string
298 */
299static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
300{
301 if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */
302 return 1;
303
304 sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:"
305 "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
306 a->value[0], a->value[1], a->value[2], a->value[3],
307 a->value[4], a->value[5], a->value[6], a->value[7],
308 a->value[8], a->value[9], a->value[10], a->value[11],
309 a->value[12], a->value[13], a->value[14], a->value[15],
310 a->value[16], a->value[17], a->value[18], a->value[19]);
311
312 return 0;
313}
314
315/**
316 * ib_addr2msg - convert InfiniBand address format to message header format
317 */
318static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area)
319{
320 memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
321 msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB;
322 memcpy(msg_area, a->value, INFINIBAND_ALEN);
323 return 0;
324}
325
326/**
327 * ib_msg2addr - convert message header address format to InfiniBand format
328 */
329static int ib_msg2addr(const struct tipc_bearer *tb_ptr,
330 struct tipc_media_addr *a, char *msg_area)
331{
332 ib_media_addr_set(tb_ptr, a, msg_area);
333 return 0;
334}
335
336/*
337 * InfiniBand media registration info
338 */
339static struct tipc_media ib_media_info = {
340 .send_msg = send_msg,
341 .enable_bearer = enable_bearer,
342 .disable_bearer = disable_bearer,
343 .addr2str = ib_addr2str,
344 .addr2msg = ib_addr2msg,
345 .msg2addr = ib_msg2addr,
346 .priority = TIPC_DEF_LINK_PRI,
347 .tolerance = TIPC_DEF_LINK_TOL,
348 .window = TIPC_DEF_LINK_WIN,
349 .type_id = TIPC_MEDIA_TYPE_IB,
350 .name = "ib"
351};
352
353/**
354 * tipc_ib_media_start - activate InfiniBand bearer support
355 *
356 * Register InfiniBand media type with TIPC bearer code. Also register
357 * with OS for notifications about device state changes.
358 */
359int tipc_ib_media_start(void)
360{
361 int res;
362
363 if (ib_started)
364 return -EINVAL;
365
366 res = tipc_register_media(&ib_media_info);
367 if (res)
368 return res;
369
370 res = register_netdevice_notifier(&notifier);
371 if (!res)
372 ib_started = 1;
373 return res;
374}
375
376/**
377 * tipc_ib_media_stop - deactivate InfiniBand bearer support
378 */
379void tipc_ib_media_stop(void)
380{
381 if (!ib_started)
382 return;
383
384 flush_scheduled_work();
385 unregister_netdevice_notifier(&notifier);
386 ib_started = 0;
387}
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 6675914dc592..8bcd4985d0fb 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -44,7 +44,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
44 struct nlmsghdr *rep_nlh; 44 struct nlmsghdr *rep_nlh;
45 struct nlmsghdr *req_nlh = info->nlhdr; 45 struct nlmsghdr *req_nlh = info->nlhdr;
46 struct tipc_genlmsghdr *req_userhdr = info->userhdr; 46 struct tipc_genlmsghdr *req_userhdr = info->userhdr;
47 int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN); 47 int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
48 u16 cmd; 48 u16 cmd;
49 49
50 if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) 50 if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN)))
@@ -53,8 +53,8 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
53 cmd = req_userhdr->cmd; 53 cmd = req_userhdr->cmd;
54 54
55 rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, 55 rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd,
56 NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, 56 nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
57 NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), 57 nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
58 hdr_space); 58 hdr_space);
59 59
60 if (rep_buf) { 60 if (rep_buf) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2db702d82e7d..9efe01113c5c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1340,7 +1340,6 @@ static void unix_destruct_scm(struct sk_buff *skb)
1340 struct scm_cookie scm; 1340 struct scm_cookie scm;
1341 memset(&scm, 0, sizeof(scm)); 1341 memset(&scm, 0, sizeof(scm));
1342 scm.pid = UNIXCB(skb).pid; 1342 scm.pid = UNIXCB(skb).pid;
1343 scm.cred = UNIXCB(skb).cred;
1344 if (UNIXCB(skb).fp) 1343 if (UNIXCB(skb).fp)
1345 unix_detach_fds(&scm, skb); 1344 unix_detach_fds(&scm, skb);
1346 1345
@@ -1391,8 +1390,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
1391 int err = 0; 1390 int err = 0;
1392 1391
1393 UNIXCB(skb).pid = get_pid(scm->pid); 1392 UNIXCB(skb).pid = get_pid(scm->pid);
1394 if (scm->cred) 1393 UNIXCB(skb).uid = scm->creds.uid;
1395 UNIXCB(skb).cred = get_cred(scm->cred); 1394 UNIXCB(skb).gid = scm->creds.gid;
1396 UNIXCB(skb).fp = NULL; 1395 UNIXCB(skb).fp = NULL;
1397 if (scm->fp && send_fds) 1396 if (scm->fp && send_fds)
1398 err = unix_attach_fds(scm, skb); 1397 err = unix_attach_fds(scm, skb);
@@ -1409,13 +1408,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
1409static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1408static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1410 const struct sock *other) 1409 const struct sock *other)
1411{ 1410{
1412 if (UNIXCB(skb).cred) 1411 if (UNIXCB(skb).pid)
1413 return; 1412 return;
1414 if (test_bit(SOCK_PASSCRED, &sock->flags) || 1413 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1415 !other->sk_socket || 1414 !other->sk_socket ||
1416 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { 1415 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1417 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1416 UNIXCB(skb).pid = get_pid(task_tgid(current));
1418 UNIXCB(skb).cred = get_current_cred(); 1417 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1419 } 1418 }
1420} 1419}
1421 1420
@@ -1819,7 +1818,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1819 siocb->scm = &tmp_scm; 1818 siocb->scm = &tmp_scm;
1820 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1819 memset(&tmp_scm, 0, sizeof(tmp_scm));
1821 } 1820 }
1822 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); 1821 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1823 unix_set_secdata(siocb->scm, skb); 1822 unix_set_secdata(siocb->scm, skb);
1824 1823
1825 if (!(flags & MSG_PEEK)) { 1824 if (!(flags & MSG_PEEK)) {
@@ -1991,11 +1990,12 @@ again:
1991 if (check_creds) { 1990 if (check_creds) {
1992 /* Never glue messages from different writers */ 1991 /* Never glue messages from different writers */
1993 if ((UNIXCB(skb).pid != siocb->scm->pid) || 1992 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
1994 (UNIXCB(skb).cred != siocb->scm->cred)) 1993 !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
1994 !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
1995 break; 1995 break;
1996 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 1996 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1997 /* Copy credentials */ 1997 /* Copy credentials */
1998 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); 1998 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1999 check_creds = 1; 1999 check_creds = 1;
2000 } 2000 }
2001 2001
@@ -2196,7 +2196,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2196 2196
2197 /* exceptional events? */ 2197 /* exceptional events? */
2198 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 2198 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2199 mask |= POLLERR; 2199 mask |= POLLERR |
2200 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2201
2200 if (sk->sk_shutdown & RCV_SHUTDOWN) 2202 if (sk->sk_shutdown & RCV_SHUTDOWN)
2201 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 2203 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2202 if (sk->sk_shutdown == SHUTDOWN_MASK) 2204 if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 5e04d3d96285..daff75200e25 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -123,6 +123,14 @@ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
123 return err > 0 ? -err : err; 123 return err > 0 ? -err : err;
124} 124}
125 125
126static u32 vmci_transport_peer_rid(u32 peer_cid)
127{
128 if (VMADDR_CID_HYPERVISOR == peer_cid)
129 return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID;
130
131 return VMCI_TRANSPORT_PACKET_RID;
132}
133
126static inline void 134static inline void
127vmci_transport_packet_init(struct vmci_transport_packet *pkt, 135vmci_transport_packet_init(struct vmci_transport_packet *pkt,
128 struct sockaddr_vm *src, 136 struct sockaddr_vm *src,
@@ -140,7 +148,7 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
140 pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, 148 pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
141 VMCI_TRANSPORT_PACKET_RID); 149 VMCI_TRANSPORT_PACKET_RID);
142 pkt->dg.dst = vmci_make_handle(dst->svm_cid, 150 pkt->dg.dst = vmci_make_handle(dst->svm_cid,
143 VMCI_TRANSPORT_PACKET_RID); 151 vmci_transport_peer_rid(dst->svm_cid));
144 pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); 152 pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
145 pkt->version = VMCI_TRANSPORT_PACKET_VERSION; 153 pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
146 pkt->type = type; 154 pkt->type = type;
@@ -508,6 +516,9 @@ static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
508 516
509static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) 517static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
510{ 518{
519 if (VMADDR_CID_HYPERVISOR == peer_cid)
520 return true;
521
511 if (vsock->cached_peer != peer_cid) { 522 if (vsock->cached_peer != peer_cid) {
512 vsock->cached_peer = peer_cid; 523 vsock->cached_peer = peer_cid;
513 if (!vmci_transport_is_trusted(vsock, peer_cid) && 524 if (!vmci_transport_is_trusted(vsock, peer_cid) &&
@@ -628,7 +639,6 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
628static bool vmci_transport_stream_allow(u32 cid, u32 port) 639static bool vmci_transport_stream_allow(u32 cid, u32 port)
629{ 640{
630 static const u32 non_socket_contexts[] = { 641 static const u32 non_socket_contexts[] = {
631 VMADDR_CID_HYPERVISOR,
632 VMADDR_CID_RESERVED, 642 VMADDR_CID_RESERVED,
633 }; 643 };
634 int i; 644 int i;
@@ -667,7 +677,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
667 */ 677 */
668 678
669 if (!vmci_transport_stream_allow(dg->src.context, -1) 679 if (!vmci_transport_stream_allow(dg->src.context, -1)
670 || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) 680 || vmci_transport_peer_rid(dg->src.context) != dg->src.resource)
671 return VMCI_ERROR_NO_ACCESS; 681 return VMCI_ERROR_NO_ACCESS;
672 682
673 if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) 683 if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index 1bf991803ec0..fd88ea8924e4 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -28,6 +28,9 @@
28/* The resource ID on which control packets are sent. */ 28/* The resource ID on which control packets are sent. */
29#define VMCI_TRANSPORT_PACKET_RID 1 29#define VMCI_TRANSPORT_PACKET_RID 1
30 30
31/* The resource ID on which control packets are sent to the hypervisor. */
32#define VMCI_TRANSPORT_HYPERVISOR_PACKET_RID 15
33
31#define VSOCK_PROTO_INVALID 0 34#define VSOCK_PROTO_INVALID 0
32#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) 35#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0)
33#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) 36#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY)
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index a4a14e8f55cc..324e8d851dc4 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -46,65 +46,3 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
46 46
47 return err; 47 return err;
48} 48}
49
50void cfg80211_ch_switch_notify(struct net_device *dev,
51 struct cfg80211_chan_def *chandef)
52{
53 struct wireless_dev *wdev = dev->ieee80211_ptr;
54 struct wiphy *wiphy = wdev->wiphy;
55 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
56
57 trace_cfg80211_ch_switch_notify(dev, chandef);
58
59 wdev_lock(wdev);
60
61 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
62 wdev->iftype != NL80211_IFTYPE_P2P_GO))
63 goto out;
64
65 wdev->channel = chandef->chan;
66 nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL);
67out:
68 wdev_unlock(wdev);
69 return;
70}
71EXPORT_SYMBOL(cfg80211_ch_switch_notify);
72
73bool cfg80211_rx_spurious_frame(struct net_device *dev,
74 const u8 *addr, gfp_t gfp)
75{
76 struct wireless_dev *wdev = dev->ieee80211_ptr;
77 bool ret;
78
79 trace_cfg80211_rx_spurious_frame(dev, addr);
80
81 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
82 wdev->iftype != NL80211_IFTYPE_P2P_GO)) {
83 trace_cfg80211_return_bool(false);
84 return false;
85 }
86 ret = nl80211_unexpected_frame(dev, addr, gfp);
87 trace_cfg80211_return_bool(ret);
88 return ret;
89}
90EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
91
92bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
93 const u8 *addr, gfp_t gfp)
94{
95 struct wireless_dev *wdev = dev->ieee80211_ptr;
96 bool ret;
97
98 trace_cfg80211_rx_unexpected_4addr_frame(dev, addr);
99
100 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
101 wdev->iftype != NL80211_IFTYPE_P2P_GO &&
102 wdev->iftype != NL80211_IFTYPE_AP_VLAN)) {
103 trace_cfg80211_return_bool(false);
104 return false;
105 }
106 ret = nl80211_unexpected_4addr_frame(dev, addr, gfp);
107 trace_cfg80211_return_bool(ret);
108 return ret;
109}
110EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 6ddf74f0ae1e..84c9ad7e1dca 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -842,6 +842,46 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
842 rdev->num_running_monitor_ifaces += num; 842 rdev->num_running_monitor_ifaces += num;
843} 843}
844 844
845void cfg80211_leave(struct cfg80211_registered_device *rdev,
846 struct wireless_dev *wdev)
847{
848 struct net_device *dev = wdev->netdev;
849
850 switch (wdev->iftype) {
851 case NL80211_IFTYPE_ADHOC:
852 cfg80211_leave_ibss(rdev, dev, true);
853 break;
854 case NL80211_IFTYPE_P2P_CLIENT:
855 case NL80211_IFTYPE_STATION:
856 mutex_lock(&rdev->sched_scan_mtx);
857 __cfg80211_stop_sched_scan(rdev, false);
858 mutex_unlock(&rdev->sched_scan_mtx);
859
860 wdev_lock(wdev);
861#ifdef CONFIG_CFG80211_WEXT
862 kfree(wdev->wext.ie);
863 wdev->wext.ie = NULL;
864 wdev->wext.ie_len = 0;
865 wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
866#endif
867 __cfg80211_disconnect(rdev, dev,
868 WLAN_REASON_DEAUTH_LEAVING, true);
869 cfg80211_mlme_down(rdev, dev);
870 wdev_unlock(wdev);
871 break;
872 case NL80211_IFTYPE_MESH_POINT:
873 cfg80211_leave_mesh(rdev, dev);
874 break;
875 case NL80211_IFTYPE_AP:
876 cfg80211_stop_ap(rdev, dev);
877 break;
878 default:
879 break;
880 }
881
882 wdev->beacon_interval = 0;
883}
884
845static int cfg80211_netdev_notifier_call(struct notifier_block *nb, 885static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
846 unsigned long state, 886 unsigned long state,
847 void *ndev) 887 void *ndev)
@@ -910,38 +950,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
910 dev->priv_flags |= IFF_DONT_BRIDGE; 950 dev->priv_flags |= IFF_DONT_BRIDGE;
911 break; 951 break;
912 case NETDEV_GOING_DOWN: 952 case NETDEV_GOING_DOWN:
913 switch (wdev->iftype) { 953 cfg80211_leave(rdev, wdev);
914 case NL80211_IFTYPE_ADHOC:
915 cfg80211_leave_ibss(rdev, dev, true);
916 break;
917 case NL80211_IFTYPE_P2P_CLIENT:
918 case NL80211_IFTYPE_STATION:
919 mutex_lock(&rdev->sched_scan_mtx);
920 __cfg80211_stop_sched_scan(rdev, false);
921 mutex_unlock(&rdev->sched_scan_mtx);
922
923 wdev_lock(wdev);
924#ifdef CONFIG_CFG80211_WEXT
925 kfree(wdev->wext.ie);
926 wdev->wext.ie = NULL;
927 wdev->wext.ie_len = 0;
928 wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
929#endif
930 __cfg80211_disconnect(rdev, dev,
931 WLAN_REASON_DEAUTH_LEAVING, true);
932 cfg80211_mlme_down(rdev, dev);
933 wdev_unlock(wdev);
934 break;
935 case NL80211_IFTYPE_MESH_POINT:
936 cfg80211_leave_mesh(rdev, dev);
937 break;
938 case NL80211_IFTYPE_AP:
939 cfg80211_stop_ap(rdev, dev);
940 break;
941 default:
942 break;
943 }
944 wdev->beacon_interval = 0;
945 break; 954 break;
946 case NETDEV_DOWN: 955 case NETDEV_DOWN:
947 cfg80211_update_iface_num(rdev, wdev->iftype, -1); 956 cfg80211_update_iface_num(rdev, wdev->iftype, -1);
@@ -1117,8 +1126,10 @@ static int __init cfg80211_init(void)
1117 goto out_fail_reg; 1126 goto out_fail_reg;
1118 1127
1119 cfg80211_wq = create_singlethread_workqueue("cfg80211"); 1128 cfg80211_wq = create_singlethread_workqueue("cfg80211");
1120 if (!cfg80211_wq) 1129 if (!cfg80211_wq) {
1130 err = -ENOMEM;
1121 goto out_fail_wq; 1131 goto out_fail_wq;
1132 }
1122 1133
1123 return 0; 1134 return 0;
1124 1135
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5845c2b37aa8..124e5e773fbc 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -330,20 +330,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
330int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 330int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
331 struct net_device *dev, 331 struct net_device *dev,
332 struct ieee80211_channel *chan, 332 struct ieee80211_channel *chan,
333 const u8 *bssid, const u8 *prev_bssid, 333 const u8 *bssid,
334 const u8 *ssid, int ssid_len, 334 const u8 *ssid, int ssid_len,
335 const u8 *ie, int ie_len, bool use_mfp, 335 struct cfg80211_assoc_request *req);
336 struct cfg80211_crypto_settings *crypt,
337 u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
338 struct ieee80211_ht_cap *ht_capa_mask);
339int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 336int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
340 struct net_device *dev, struct ieee80211_channel *chan, 337 struct net_device *dev,
341 const u8 *bssid, const u8 *prev_bssid, 338 struct ieee80211_channel *chan,
339 const u8 *bssid,
342 const u8 *ssid, int ssid_len, 340 const u8 *ssid, int ssid_len,
343 const u8 *ie, int ie_len, bool use_mfp, 341 struct cfg80211_assoc_request *req);
344 struct cfg80211_crypto_settings *crypt,
345 u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
346 struct ieee80211_ht_cap *ht_capa_mask);
347int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, 342int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
348 struct net_device *dev, const u8 *bssid, 343 struct net_device *dev, const u8 *bssid,
349 const u8 *ie, int ie_len, u16 reason, 344 const u8 *ie, int ie_len, u16 reason,
@@ -375,6 +370,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
375 bool no_cck, bool dont_wait_for_ack, u64 *cookie); 370 bool no_cck, bool dont_wait_for_ack, u64 *cookie);
376void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, 371void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
377 const struct ieee80211_ht_cap *ht_capa_mask); 372 const struct ieee80211_ht_cap *ht_capa_mask);
373void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
374 const struct ieee80211_vht_cap *vht_capa_mask);
378 375
379/* SME */ 376/* SME */
380int __cfg80211_connect(struct cfg80211_registered_device *rdev, 377int __cfg80211_connect(struct cfg80211_registered_device *rdev,
@@ -503,6 +500,9 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
503void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, 500void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
504 enum nl80211_iftype iftype, int num); 501 enum nl80211_iftype iftype, int num);
505 502
503void cfg80211_leave(struct cfg80211_registered_device *rdev,
504 struct wireless_dev *wdev);
505
506void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, 506void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
507 struct wireless_dev *wdev); 507 struct wireless_dev *wdev);
508 508
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 55957a284f6c..0bb93f3061a4 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -85,6 +85,7 @@ const struct mesh_setup default_mesh_setup = {
85 .ie = NULL, 85 .ie = NULL,
86 .ie_len = 0, 86 .ie_len = 0,
87 .is_secure = false, 87 .is_secure = false,
88 .user_mpm = false,
88 .beacon_interval = MESH_DEFAULT_BEACON_INTERVAL, 89 .beacon_interval = MESH_DEFAULT_BEACON_INTERVAL,
89 .dtim_period = MESH_DEFAULT_DTIM_PERIOD, 90 .dtim_period = MESH_DEFAULT_DTIM_PERIOD,
90}; 91};
@@ -233,20 +234,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
233 return 0; 234 return 0;
234} 235}
235 236
236void cfg80211_notify_new_peer_candidate(struct net_device *dev,
237 const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp)
238{
239 struct wireless_dev *wdev = dev->ieee80211_ptr;
240
241 trace_cfg80211_notify_new_peer_candidate(dev, macaddr);
242 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
243 return;
244
245 nl80211_send_new_peer_candidate(wiphy_to_dev(wdev->wiphy), dev,
246 macaddr, ie, ie_len, gfp);
247}
248EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
249
250static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, 237static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
251 struct net_device *dev) 238 struct net_device *dev)
252{ 239{
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index caddca35d686..390198bf4b36 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -187,30 +187,6 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
187} 187}
188EXPORT_SYMBOL(cfg80211_send_disassoc); 188EXPORT_SYMBOL(cfg80211_send_disassoc);
189 189
190void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
191 size_t len)
192{
193 struct wireless_dev *wdev = dev->ieee80211_ptr;
194 struct wiphy *wiphy = wdev->wiphy;
195 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
196
197 trace_cfg80211_send_unprot_deauth(dev);
198 nl80211_send_unprot_deauth(rdev, dev, buf, len, GFP_ATOMIC);
199}
200EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
201
202void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
203 size_t len)
204{
205 struct wireless_dev *wdev = dev->ieee80211_ptr;
206 struct wiphy *wiphy = wdev->wiphy;
207 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
208
209 trace_cfg80211_send_unprot_disassoc(dev);
210 nl80211_send_unprot_disassoc(rdev, dev, buf, len, GFP_ATOMIC);
211}
212EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);
213
214void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) 190void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr)
215{ 191{
216 struct wireless_dev *wdev = dev->ieee80211_ptr; 192 struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -367,27 +343,38 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
367 p1[i] &= p2[i]; 343 p1[i] &= p2[i];
368} 344}
369 345
346/* Do a logical ht_capa &= ht_capa_mask. */
347void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
348 const struct ieee80211_vht_cap *vht_capa_mask)
349{
350 int i;
351 u8 *p1, *p2;
352 if (!vht_capa_mask) {
353 memset(vht_capa, 0, sizeof(*vht_capa));
354 return;
355 }
356
357 p1 = (u8*)(vht_capa);
358 p2 = (u8*)(vht_capa_mask);
359 for (i = 0; i < sizeof(*vht_capa); i++)
360 p1[i] &= p2[i];
361}
362
370int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 363int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
371 struct net_device *dev, 364 struct net_device *dev,
372 struct ieee80211_channel *chan, 365 struct ieee80211_channel *chan,
373 const u8 *bssid, const u8 *prev_bssid, 366 const u8 *bssid,
374 const u8 *ssid, int ssid_len, 367 const u8 *ssid, int ssid_len,
375 const u8 *ie, int ie_len, bool use_mfp, 368 struct cfg80211_assoc_request *req)
376 struct cfg80211_crypto_settings *crypt,
377 u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
378 struct ieee80211_ht_cap *ht_capa_mask)
379{ 369{
380 struct wireless_dev *wdev = dev->ieee80211_ptr; 370 struct wireless_dev *wdev = dev->ieee80211_ptr;
381 struct cfg80211_assoc_request req;
382 int err; 371 int err;
383 bool was_connected = false; 372 bool was_connected = false;
384 373
385 ASSERT_WDEV_LOCK(wdev); 374 ASSERT_WDEV_LOCK(wdev);
386 375
387 memset(&req, 0, sizeof(req)); 376 if (wdev->current_bss && req->prev_bssid &&
388 377 ether_addr_equal(wdev->current_bss->pub.bssid, req->prev_bssid)) {
389 if (wdev->current_bss && prev_bssid &&
390 ether_addr_equal(wdev->current_bss->pub.bssid, prev_bssid)) {
391 /* 378 /*
392 * Trying to reassociate: Allow this to proceed and let the old 379 * Trying to reassociate: Allow this to proceed and let the old
393 * association to be dropped when the new one is completed. 380 * association to be dropped when the new one is completed.
@@ -399,40 +386,30 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
399 } else if (wdev->current_bss) 386 } else if (wdev->current_bss)
400 return -EALREADY; 387 return -EALREADY;
401 388
402 req.ie = ie; 389 cfg80211_oper_and_ht_capa(&req->ht_capa_mask,
403 req.ie_len = ie_len;
404 memcpy(&req.crypto, crypt, sizeof(req.crypto));
405 req.use_mfp = use_mfp;
406 req.prev_bssid = prev_bssid;
407 req.flags = assoc_flags;
408 if (ht_capa)
409 memcpy(&req.ht_capa, ht_capa, sizeof(req.ht_capa));
410 if (ht_capa_mask)
411 memcpy(&req.ht_capa_mask, ht_capa_mask,
412 sizeof(req.ht_capa_mask));
413 cfg80211_oper_and_ht_capa(&req.ht_capa_mask,
414 rdev->wiphy.ht_capa_mod_mask); 390 rdev->wiphy.ht_capa_mod_mask);
391 cfg80211_oper_and_vht_capa(&req->vht_capa_mask,
392 rdev->wiphy.vht_capa_mod_mask);
415 393
416 req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, 394 req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
417 WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); 395 WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
418 if (!req.bss) { 396 if (!req->bss) {
419 if (was_connected) 397 if (was_connected)
420 wdev->sme_state = CFG80211_SME_CONNECTED; 398 wdev->sme_state = CFG80211_SME_CONNECTED;
421 return -ENOENT; 399 return -ENOENT;
422 } 400 }
423 401
424 err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel, 402 err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED);
425 CHAN_MODE_SHARED);
426 if (err) 403 if (err)
427 goto out; 404 goto out;
428 405
429 err = rdev_assoc(rdev, dev, &req); 406 err = rdev_assoc(rdev, dev, req);
430 407
431out: 408out:
432 if (err) { 409 if (err) {
433 if (was_connected) 410 if (was_connected)
434 wdev->sme_state = CFG80211_SME_CONNECTED; 411 wdev->sme_state = CFG80211_SME_CONNECTED;
435 cfg80211_put_bss(&rdev->wiphy, req.bss); 412 cfg80211_put_bss(&rdev->wiphy, req->bss);
436 } 413 }
437 414
438 return err; 415 return err;
@@ -441,21 +418,17 @@ out:
441int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 418int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
442 struct net_device *dev, 419 struct net_device *dev,
443 struct ieee80211_channel *chan, 420 struct ieee80211_channel *chan,
444 const u8 *bssid, const u8 *prev_bssid, 421 const u8 *bssid,
445 const u8 *ssid, int ssid_len, 422 const u8 *ssid, int ssid_len,
446 const u8 *ie, int ie_len, bool use_mfp, 423 struct cfg80211_assoc_request *req)
447 struct cfg80211_crypto_settings *crypt,
448 u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
449 struct ieee80211_ht_cap *ht_capa_mask)
450{ 424{
451 struct wireless_dev *wdev = dev->ieee80211_ptr; 425 struct wireless_dev *wdev = dev->ieee80211_ptr;
452 int err; 426 int err;
453 427
454 mutex_lock(&rdev->devlist_mtx); 428 mutex_lock(&rdev->devlist_mtx);
455 wdev_lock(wdev); 429 wdev_lock(wdev);
456 err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, 430 err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid,
457 ssid, ssid_len, ie, ie_len, use_mfp, crypt, 431 ssid, ssid_len, req);
458 assoc_flags, ht_capa, ht_capa_mask);
459 wdev_unlock(wdev); 432 wdev_unlock(wdev);
460 mutex_unlock(&rdev->devlist_mtx); 433 mutex_unlock(&rdev->devlist_mtx);
461 434
@@ -577,62 +550,6 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
577 } 550 }
578} 551}
579 552
580void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie,
581 struct ieee80211_channel *chan,
582 unsigned int duration, gfp_t gfp)
583{
584 struct wiphy *wiphy = wdev->wiphy;
585 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
586
587 trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);
588 nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, duration, gfp);
589}
590EXPORT_SYMBOL(cfg80211_ready_on_channel);
591
592void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
593 struct ieee80211_channel *chan,
594 gfp_t gfp)
595{
596 struct wiphy *wiphy = wdev->wiphy;
597 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
598
599 trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);
600 nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, gfp);
601}
602EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
603
604void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
605 struct station_info *sinfo, gfp_t gfp)
606{
607 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
608 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
609
610 trace_cfg80211_new_sta(dev, mac_addr, sinfo);
611 nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp);
612}
613EXPORT_SYMBOL(cfg80211_new_sta);
614
615void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
616{
617 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
618 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
619
620 trace_cfg80211_del_sta(dev, mac_addr);
621 nl80211_send_sta_del_event(rdev, dev, mac_addr, gfp);
622}
623EXPORT_SYMBOL(cfg80211_del_sta);
624
625void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
626 enum nl80211_connect_failed_reason reason,
627 gfp_t gfp)
628{
629 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
630 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
631
632 nl80211_send_conn_failed_event(rdev, dev, mac_addr, reason, gfp);
633}
634EXPORT_SYMBOL(cfg80211_conn_failed);
635
636struct cfg80211_mgmt_registration { 553struct cfg80211_mgmt_registration {
637 struct list_head list; 554 struct list_head list;
638 555
@@ -909,85 +826,6 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
909} 826}
910EXPORT_SYMBOL(cfg80211_rx_mgmt); 827EXPORT_SYMBOL(cfg80211_rx_mgmt);
911 828
912void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
913 const u8 *buf, size_t len, bool ack, gfp_t gfp)
914{
915 struct wiphy *wiphy = wdev->wiphy;
916 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
917
918 trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
919
920 /* Indicate TX status of the Action frame to user space */
921 nl80211_send_mgmt_tx_status(rdev, wdev, cookie, buf, len, ack, gfp);
922}
923EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
924
925void cfg80211_cqm_rssi_notify(struct net_device *dev,
926 enum nl80211_cqm_rssi_threshold_event rssi_event,
927 gfp_t gfp)
928{
929 struct wireless_dev *wdev = dev->ieee80211_ptr;
930 struct wiphy *wiphy = wdev->wiphy;
931 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
932
933 trace_cfg80211_cqm_rssi_notify(dev, rssi_event);
934
935 /* Indicate roaming trigger event to user space */
936 nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
937}
938EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
939
940void cfg80211_cqm_pktloss_notify(struct net_device *dev,
941 const u8 *peer, u32 num_packets, gfp_t gfp)
942{
943 struct wireless_dev *wdev = dev->ieee80211_ptr;
944 struct wiphy *wiphy = wdev->wiphy;
945 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
946
947 trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets);
948
949 /* Indicate roaming trigger event to user space */
950 nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp);
951}
952EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
953
954void cfg80211_cqm_txe_notify(struct net_device *dev,
955 const u8 *peer, u32 num_packets,
956 u32 rate, u32 intvl, gfp_t gfp)
957{
958 struct wireless_dev *wdev = dev->ieee80211_ptr;
959 struct wiphy *wiphy = wdev->wiphy;
960 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
961
962 nl80211_send_cqm_txe_notify(rdev, dev, peer, num_packets,
963 rate, intvl, gfp);
964}
965EXPORT_SYMBOL(cfg80211_cqm_txe_notify);
966
967void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
968 const u8 *replay_ctr, gfp_t gfp)
969{
970 struct wireless_dev *wdev = dev->ieee80211_ptr;
971 struct wiphy *wiphy = wdev->wiphy;
972 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
973
974 trace_cfg80211_gtk_rekey_notify(dev, bssid);
975 nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp);
976}
977EXPORT_SYMBOL(cfg80211_gtk_rekey_notify);
978
979void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
980 const u8 *bssid, bool preauth, gfp_t gfp)
981{
982 struct wireless_dev *wdev = dev->ieee80211_ptr;
983 struct wiphy *wiphy = wdev->wiphy;
984 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
985
986 trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth);
987 nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
988}
989EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
990
991void cfg80211_dfs_channels_update_work(struct work_struct *work) 829void cfg80211_dfs_channels_update_work(struct work_struct *work)
992{ 830{
993 struct delayed_work *delayed_work; 831 struct delayed_work *delayed_work;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 58e13a8c95f9..671b69a3c136 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -370,6 +370,14 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
370 [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, 370 [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED },
371 [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, 371 [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 },
372 [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, 372 [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, },
373 [NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, },
374 [NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG },
375 [NL80211_ATTR_VHT_CAPABILITY_MASK] = {
376 .len = NL80211_VHT_CAPABILITY_LEN,
377 },
378 [NL80211_ATTR_MDID] = { .type = NLA_U16 },
379 [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
380 .len = IEEE80211_MAX_DATA_LEN },
373}; 381};
374 382
375/* policy for the key attributes */ 383/* policy for the key attributes */
@@ -539,7 +547,8 @@ static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
539} 547}
540 548
541static int nl80211_msg_put_channel(struct sk_buff *msg, 549static int nl80211_msg_put_channel(struct sk_buff *msg,
542 struct ieee80211_channel *chan) 550 struct ieee80211_channel *chan,
551 bool large)
543{ 552{
544 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ, 553 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ,
545 chan->center_freq)) 554 chan->center_freq))
@@ -554,9 +563,37 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
554 if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && 563 if ((chan->flags & IEEE80211_CHAN_NO_IBSS) &&
555 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS)) 564 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS))
556 goto nla_put_failure; 565 goto nla_put_failure;
557 if ((chan->flags & IEEE80211_CHAN_RADAR) && 566 if (chan->flags & IEEE80211_CHAN_RADAR) {
558 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) 567 if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR))
559 goto nla_put_failure; 568 goto nla_put_failure;
569 if (large) {
570 u32 time;
571
572 time = elapsed_jiffies_msecs(chan->dfs_state_entered);
573
574 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE,
575 chan->dfs_state))
576 goto nla_put_failure;
577 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME,
578 time))
579 goto nla_put_failure;
580 }
581 }
582
583 if (large) {
584 if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) &&
585 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS))
586 goto nla_put_failure;
587 if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) &&
588 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS))
589 goto nla_put_failure;
590 if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) &&
591 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ))
592 goto nla_put_failure;
593 if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) &&
594 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ))
595 goto nla_put_failure;
596 }
560 597
561 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, 598 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
562 DBM_TO_MBM(chan->max_power))) 599 DBM_TO_MBM(chan->max_power)))
@@ -832,7 +869,8 @@ nla_put_failure:
832} 869}
833 870
834static int nl80211_put_iface_combinations(struct wiphy *wiphy, 871static int nl80211_put_iface_combinations(struct wiphy *wiphy,
835 struct sk_buff *msg) 872 struct sk_buff *msg,
873 bool large)
836{ 874{
837 struct nlattr *nl_combis; 875 struct nlattr *nl_combis;
838 int i, j; 876 int i, j;
@@ -881,6 +919,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,
881 nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, 919 nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM,
882 c->max_interfaces)) 920 c->max_interfaces))
883 goto nla_put_failure; 921 goto nla_put_failure;
922 if (large &&
923 nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
924 c->radar_detect_widths))
925 goto nla_put_failure;
884 926
885 nla_nest_end(msg, nl_combi); 927 nla_nest_end(msg, nl_combi);
886 } 928 }
@@ -892,412 +934,611 @@ nla_put_failure:
892 return -ENOBUFS; 934 return -ENOBUFS;
893} 935}
894 936
895static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, 937#ifdef CONFIG_PM
896 struct cfg80211_registered_device *dev) 938static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev,
939 struct sk_buff *msg)
897{ 940{
898 void *hdr; 941 const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp;
899 struct nlattr *nl_bands, *nl_band; 942 struct nlattr *nl_tcp;
900 struct nlattr *nl_freqs, *nl_freq;
901 struct nlattr *nl_rates, *nl_rate;
902 struct nlattr *nl_cmds;
903 enum ieee80211_band band;
904 struct ieee80211_channel *chan;
905 struct ieee80211_rate *rate;
906 int i;
907 const struct ieee80211_txrx_stypes *mgmt_stypes =
908 dev->wiphy.mgmt_stypes;
909 943
910 hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); 944 if (!tcp)
911 if (!hdr) 945 return 0;
912 return -1;
913 946
914 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || 947 nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION);
915 nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)) || 948 if (!nl_tcp)
916 nla_put_u32(msg, NL80211_ATTR_GENERATION, 949 return -ENOBUFS;
917 cfg80211_rdev_list_generation) ||
918 nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
919 dev->wiphy.retry_short) ||
920 nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
921 dev->wiphy.retry_long) ||
922 nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
923 dev->wiphy.frag_threshold) ||
924 nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
925 dev->wiphy.rts_threshold) ||
926 nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
927 dev->wiphy.coverage_class) ||
928 nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
929 dev->wiphy.max_scan_ssids) ||
930 nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
931 dev->wiphy.max_sched_scan_ssids) ||
932 nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
933 dev->wiphy.max_scan_ie_len) ||
934 nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
935 dev->wiphy.max_sched_scan_ie_len) ||
936 nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
937 dev->wiphy.max_match_sets))
938 goto nla_put_failure;
939 950
940 if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && 951 if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD,
941 nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) 952 tcp->data_payload_max))
942 goto nla_put_failure; 953 return -ENOBUFS;
943 if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
944 nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
945 goto nla_put_failure;
946 if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
947 nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
948 goto nla_put_failure;
949 if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
950 nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
951 goto nla_put_failure;
952 if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
953 nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
954 goto nla_put_failure;
955 if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
956 nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
957 goto nla_put_failure;
958 954
959 if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, 955 if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD,
960 sizeof(u32) * dev->wiphy.n_cipher_suites, 956 tcp->data_payload_max))
961 dev->wiphy.cipher_suites)) 957 return -ENOBUFS;
962 goto nla_put_failure;
963 958
964 if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, 959 if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ))
965 dev->wiphy.max_num_pmkids)) 960 return -ENOBUFS;
966 goto nla_put_failure;
967 961
968 if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && 962 if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN,
969 nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) 963 sizeof(*tcp->tok), tcp->tok))
970 goto nla_put_failure; 964 return -ENOBUFS;
971 965
972 if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, 966 if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL,
973 dev->wiphy.available_antennas_tx) || 967 tcp->data_interval_max))
974 nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, 968 return -ENOBUFS;
975 dev->wiphy.available_antennas_rx))
976 goto nla_put_failure;
977 969
978 if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && 970 if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD,
979 nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, 971 tcp->wake_payload_max))
980 dev->wiphy.probe_resp_offload)) 972 return -ENOBUFS;
981 goto nla_put_failure;
982 973
983 if ((dev->wiphy.available_antennas_tx || 974 nla_nest_end(msg, nl_tcp);
984 dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) { 975 return 0;
985 u32 tx_ant = 0, rx_ant = 0; 976}
986 int res; 977
987 res = rdev_get_antenna(dev, &tx_ant, &rx_ant); 978static int nl80211_send_wowlan(struct sk_buff *msg,
988 if (!res) { 979 struct cfg80211_registered_device *dev,
989 if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, 980 bool large)
990 tx_ant) || 981{
991 nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, 982 struct nlattr *nl_wowlan;
992 rx_ant)) 983
993 goto nla_put_failure; 984 if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns)
994 } 985 return 0;
986
987 nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
988 if (!nl_wowlan)
989 return -ENOBUFS;
990
991 if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) &&
992 nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
993 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) &&
994 nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
995 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) &&
996 nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
997 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
998 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
999 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
1000 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
1001 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
1002 nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
1003 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
1004 nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
1005 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
1006 nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
1007 return -ENOBUFS;
1008
1009 if (dev->wiphy.wowlan.n_patterns) {
1010 struct nl80211_wowlan_pattern_support pat = {
1011 .max_patterns = dev->wiphy.wowlan.n_patterns,
1012 .min_pattern_len = dev->wiphy.wowlan.pattern_min_len,
1013 .max_pattern_len = dev->wiphy.wowlan.pattern_max_len,
1014 .max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset,
1015 };
1016
1017 if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
1018 sizeof(pat), &pat))
1019 return -ENOBUFS;
995 } 1020 }
996 1021
997 if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, 1022 if (large && nl80211_send_wowlan_tcp_caps(dev, msg))
998 dev->wiphy.interface_modes)) 1023 return -ENOBUFS;
999 goto nla_put_failure;
1000 1024
1001 nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); 1025 nla_nest_end(msg, nl_wowlan);
1002 if (!nl_bands)
1003 goto nla_put_failure;
1004 1026
1005 for (band = 0; band < IEEE80211_NUM_BANDS; band++) { 1027 return 0;
1006 if (!dev->wiphy.bands[band]) 1028}
1007 continue; 1029#endif
1008 1030
1009 nl_band = nla_nest_start(msg, band); 1031static int nl80211_send_band_rateinfo(struct sk_buff *msg,
1010 if (!nl_band) 1032 struct ieee80211_supported_band *sband)
1011 goto nla_put_failure; 1033{
1034 struct nlattr *nl_rates, *nl_rate;
1035 struct ieee80211_rate *rate;
1036 int i;
1012 1037
1013 /* add HT info */ 1038 /* add HT info */
1014 if (dev->wiphy.bands[band]->ht_cap.ht_supported && 1039 if (sband->ht_cap.ht_supported &&
1015 (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET, 1040 (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET,
1016 sizeof(dev->wiphy.bands[band]->ht_cap.mcs), 1041 sizeof(sband->ht_cap.mcs),
1017 &dev->wiphy.bands[band]->ht_cap.mcs) || 1042 &sband->ht_cap.mcs) ||
1018 nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA, 1043 nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA,
1019 dev->wiphy.bands[band]->ht_cap.cap) || 1044 sband->ht_cap.cap) ||
1020 nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, 1045 nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
1021 dev->wiphy.bands[band]->ht_cap.ampdu_factor) || 1046 sband->ht_cap.ampdu_factor) ||
1022 nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, 1047 nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
1023 dev->wiphy.bands[band]->ht_cap.ampdu_density))) 1048 sband->ht_cap.ampdu_density)))
1024 goto nla_put_failure; 1049 return -ENOBUFS;
1025 1050
1026 /* add VHT info */ 1051 /* add VHT info */
1027 if (dev->wiphy.bands[band]->vht_cap.vht_supported && 1052 if (sband->vht_cap.vht_supported &&
1028 (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, 1053 (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET,
1029 sizeof(dev->wiphy.bands[band]->vht_cap.vht_mcs), 1054 sizeof(sband->vht_cap.vht_mcs),
1030 &dev->wiphy.bands[band]->vht_cap.vht_mcs) || 1055 &sband->vht_cap.vht_mcs) ||
1031 nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, 1056 nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA,
1032 dev->wiphy.bands[band]->vht_cap.cap))) 1057 sband->vht_cap.cap)))
1033 goto nla_put_failure; 1058 return -ENOBUFS;
1034 1059
1035 /* add frequencies */ 1060 /* add bitrates */
1036 nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS); 1061 nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
1037 if (!nl_freqs) 1062 if (!nl_rates)
1038 goto nla_put_failure; 1063 return -ENOBUFS;
1039 1064
1040 for (i = 0; i < dev->wiphy.bands[band]->n_channels; i++) { 1065 for (i = 0; i < sband->n_bitrates; i++) {
1041 nl_freq = nla_nest_start(msg, i); 1066 nl_rate = nla_nest_start(msg, i);
1042 if (!nl_freq) 1067 if (!nl_rate)
1043 goto nla_put_failure; 1068 return -ENOBUFS;
1044 1069
1045 chan = &dev->wiphy.bands[band]->channels[i]; 1070 rate = &sband->bitrates[i];
1071 if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE,
1072 rate->bitrate))
1073 return -ENOBUFS;
1074 if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
1075 nla_put_flag(msg,
1076 NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE))
1077 return -ENOBUFS;
1046 1078
1047 if (nl80211_msg_put_channel(msg, chan)) 1079 nla_nest_end(msg, nl_rate);
1048 goto nla_put_failure; 1080 }
1049 1081
1050 nla_nest_end(msg, nl_freq); 1082 nla_nest_end(msg, nl_rates);
1051 }
1052 1083
1053 nla_nest_end(msg, nl_freqs); 1084 return 0;
1085}
1054 1086
1055 /* add bitrates */ 1087static int
1056 nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); 1088nl80211_send_mgmt_stypes(struct sk_buff *msg,
1057 if (!nl_rates) 1089 const struct ieee80211_txrx_stypes *mgmt_stypes)
1058 goto nla_put_failure; 1090{
1091 u16 stypes;
1092 struct nlattr *nl_ftypes, *nl_ifs;
1093 enum nl80211_iftype ift;
1094 int i;
1059 1095
1060 for (i = 0; i < dev->wiphy.bands[band]->n_bitrates; i++) { 1096 if (!mgmt_stypes)
1061 nl_rate = nla_nest_start(msg, i); 1097 return 0;
1062 if (!nl_rate)
1063 goto nla_put_failure;
1064 1098
1065 rate = &dev->wiphy.bands[band]->bitrates[i]; 1099 nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
1066 if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE, 1100 if (!nl_ifs)
1067 rate->bitrate)) 1101 return -ENOBUFS;
1068 goto nla_put_failure;
1069 if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
1070 nla_put_flag(msg,
1071 NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE))
1072 goto nla_put_failure;
1073 1102
1074 nla_nest_end(msg, nl_rate); 1103 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
1104 nl_ftypes = nla_nest_start(msg, ift);
1105 if (!nl_ftypes)
1106 return -ENOBUFS;
1107 i = 0;
1108 stypes = mgmt_stypes[ift].tx;
1109 while (stypes) {
1110 if ((stypes & 1) &&
1111 nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
1112 (i << 4) | IEEE80211_FTYPE_MGMT))
1113 return -ENOBUFS;
1114 stypes >>= 1;
1115 i++;
1075 } 1116 }
1117 nla_nest_end(msg, nl_ftypes);
1118 }
1076 1119
1077 nla_nest_end(msg, nl_rates); 1120 nla_nest_end(msg, nl_ifs);
1078 1121
1079 nla_nest_end(msg, nl_band); 1122 nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
1123 if (!nl_ifs)
1124 return -ENOBUFS;
1125
1126 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
1127 nl_ftypes = nla_nest_start(msg, ift);
1128 if (!nl_ftypes)
1129 return -ENOBUFS;
1130 i = 0;
1131 stypes = mgmt_stypes[ift].rx;
1132 while (stypes) {
1133 if ((stypes & 1) &&
1134 nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
1135 (i << 4) | IEEE80211_FTYPE_MGMT))
1136 return -ENOBUFS;
1137 stypes >>= 1;
1138 i++;
1139 }
1140 nla_nest_end(msg, nl_ftypes);
1080 } 1141 }
1081 nla_nest_end(msg, nl_bands); 1142 nla_nest_end(msg, nl_ifs);
1082 1143
1083 nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); 1144 return 0;
1084 if (!nl_cmds) 1145}
1085 goto nla_put_failure;
1086 1146
1087 i = 0; 1147static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1088#define CMD(op, n) \ 1148 struct sk_buff *msg, u32 portid, u32 seq,
1089 do { \ 1149 int flags, bool split, long *split_start,
1090 if (dev->ops->op) { \ 1150 long *band_start, long *chan_start)
1091 i++; \ 1151{
1092 if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ 1152 void *hdr;
1093 goto nla_put_failure; \ 1153 struct nlattr *nl_bands, *nl_band;
1094 } \ 1154 struct nlattr *nl_freqs, *nl_freq;
1095 } while (0) 1155 struct nlattr *nl_cmds;
1096 1156 enum ieee80211_band band;
1097 CMD(add_virtual_intf, NEW_INTERFACE); 1157 struct ieee80211_channel *chan;
1098 CMD(change_virtual_intf, SET_INTERFACE); 1158 int i;
1099 CMD(add_key, NEW_KEY); 1159 const struct ieee80211_txrx_stypes *mgmt_stypes =
1100 CMD(start_ap, START_AP); 1160 dev->wiphy.mgmt_stypes;
1101 CMD(add_station, NEW_STATION); 1161 long start = 0, start_chan = 0, start_band = 0;
1102 CMD(add_mpath, NEW_MPATH); 1162 u32 features;
1103 CMD(update_mesh_config, SET_MESH_CONFIG); 1163
1104 CMD(change_bss, SET_BSS); 1164 hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
1105 CMD(auth, AUTHENTICATE); 1165 if (!hdr)
1106 CMD(assoc, ASSOCIATE); 1166 return -ENOBUFS;
1107 CMD(deauth, DEAUTHENTICATE); 1167
1108 CMD(disassoc, DISASSOCIATE); 1168 /* allow always using the variables */
1109 CMD(join_ibss, JOIN_IBSS); 1169 if (!split) {
1110 CMD(join_mesh, JOIN_MESH); 1170 split_start = &start;
1111 CMD(set_pmksa, SET_PMKSA); 1171 band_start = &start_band;
1112 CMD(del_pmksa, DEL_PMKSA); 1172 chan_start = &start_chan;
1113 CMD(flush_pmksa, FLUSH_PMKSA);
1114 if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
1115 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
1116 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
1117 CMD(mgmt_tx, FRAME);
1118 CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
1119 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
1120 i++;
1121 if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
1122 goto nla_put_failure;
1123 } 1173 }
1124 if (dev->ops->set_monitor_channel || dev->ops->start_ap || 1174
1125 dev->ops->join_mesh) { 1175 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
1126 i++; 1176 nla_put_string(msg, NL80211_ATTR_WIPHY_NAME,
1127 if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) 1177 wiphy_name(&dev->wiphy)) ||
1178 nla_put_u32(msg, NL80211_ATTR_GENERATION,
1179 cfg80211_rdev_list_generation))
1180 goto nla_put_failure;
1181
1182 switch (*split_start) {
1183 case 0:
1184 if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
1185 dev->wiphy.retry_short) ||
1186 nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
1187 dev->wiphy.retry_long) ||
1188 nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
1189 dev->wiphy.frag_threshold) ||
1190 nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
1191 dev->wiphy.rts_threshold) ||
1192 nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
1193 dev->wiphy.coverage_class) ||
1194 nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
1195 dev->wiphy.max_scan_ssids) ||
1196 nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
1197 dev->wiphy.max_sched_scan_ssids) ||
1198 nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
1199 dev->wiphy.max_scan_ie_len) ||
1200 nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
1201 dev->wiphy.max_sched_scan_ie_len) ||
1202 nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
1203 dev->wiphy.max_match_sets))
1128 goto nla_put_failure; 1204 goto nla_put_failure;
1129 } 1205
1130 CMD(set_wds_peer, SET_WDS_PEER); 1206 if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
1131 if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { 1207 nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))
1132 CMD(tdls_mgmt, TDLS_MGMT); 1208 goto nla_put_failure;
1133 CMD(tdls_oper, TDLS_OPER); 1209 if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
1134 } 1210 nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
1135 if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) 1211 goto nla_put_failure;
1136 CMD(sched_scan_start, START_SCHED_SCAN); 1212 if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
1137 CMD(probe_client, PROBE_CLIENT); 1213 nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
1138 CMD(set_noack_map, SET_NOACK_MAP); 1214 goto nla_put_failure;
1139 if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { 1215 if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
1140 i++; 1216 nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
1141 if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) 1217 goto nla_put_failure;
1218 if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
1219 nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
1220 goto nla_put_failure;
1221 if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
1222 nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
1142 goto nla_put_failure; 1223 goto nla_put_failure;
1143 }
1144 CMD(start_p2p_device, START_P2P_DEVICE);
1145 CMD(set_mcast_rate, SET_MCAST_RATE);
1146 1224
1147#ifdef CONFIG_NL80211_TESTMODE 1225 (*split_start)++;
1148 CMD(testmode_cmd, TESTMODE); 1226 if (split)
1149#endif 1227 break;
1228 case 1:
1229 if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
1230 sizeof(u32) * dev->wiphy.n_cipher_suites,
1231 dev->wiphy.cipher_suites))
1232 goto nla_put_failure;
1150 1233
1151#undef CMD 1234 if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
1235 dev->wiphy.max_num_pmkids))
1236 goto nla_put_failure;
1152 1237
1153 if (dev->ops->connect || dev->ops->auth) { 1238 if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
1154 i++; 1239 nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))
1155 if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
1156 goto nla_put_failure; 1240 goto nla_put_failure;
1157 }
1158 1241
1159 if (dev->ops->disconnect || dev->ops->deauth) { 1242 if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
1160 i++; 1243 dev->wiphy.available_antennas_tx) ||
1161 if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) 1244 nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
1245 dev->wiphy.available_antennas_rx))
1162 goto nla_put_failure; 1246 goto nla_put_failure;
1163 }
1164 1247
1165 nla_nest_end(msg, nl_cmds); 1248 if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
1249 nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
1250 dev->wiphy.probe_resp_offload))
1251 goto nla_put_failure;
1166 1252
1167 if (dev->ops->remain_on_channel && 1253 if ((dev->wiphy.available_antennas_tx ||
1168 (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && 1254 dev->wiphy.available_antennas_rx) &&
1169 nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, 1255 dev->ops->get_antenna) {
1170 dev->wiphy.max_remain_on_channel_duration)) 1256 u32 tx_ant = 0, rx_ant = 0;
1171 goto nla_put_failure; 1257 int res;
1258 res = rdev_get_antenna(dev, &tx_ant, &rx_ant);
1259 if (!res) {
1260 if (nla_put_u32(msg,
1261 NL80211_ATTR_WIPHY_ANTENNA_TX,
1262 tx_ant) ||
1263 nla_put_u32(msg,
1264 NL80211_ATTR_WIPHY_ANTENNA_RX,
1265 rx_ant))
1266 goto nla_put_failure;
1267 }
1268 }
1172 1269
1173 if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && 1270 (*split_start)++;
1174 nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) 1271 if (split)
1175 goto nla_put_failure; 1272 break;
1273 case 2:
1274 if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
1275 dev->wiphy.interface_modes))
1276 goto nla_put_failure;
1277 (*split_start)++;
1278 if (split)
1279 break;
1280 case 3:
1281 nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
1282 if (!nl_bands)
1283 goto nla_put_failure;
1176 1284
1177 if (mgmt_stypes) { 1285 for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) {
1178 u16 stypes; 1286 struct ieee80211_supported_band *sband;
1179 struct nlattr *nl_ftypes, *nl_ifs;
1180 enum nl80211_iftype ift;
1181 1287
1182 nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); 1288 sband = dev->wiphy.bands[band];
1183 if (!nl_ifs)
1184 goto nla_put_failure;
1185 1289
1186 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { 1290 if (!sband)
1187 nl_ftypes = nla_nest_start(msg, ift); 1291 continue;
1188 if (!nl_ftypes) 1292
1293 nl_band = nla_nest_start(msg, band);
1294 if (!nl_band)
1189 goto nla_put_failure; 1295 goto nla_put_failure;
1190 i = 0; 1296
1191 stypes = mgmt_stypes[ift].tx; 1297 switch (*chan_start) {
1192 while (stypes) { 1298 case 0:
1193 if ((stypes & 1) && 1299 if (nl80211_send_band_rateinfo(msg, sband))
1194 nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
1195 (i << 4) | IEEE80211_FTYPE_MGMT))
1196 goto nla_put_failure; 1300 goto nla_put_failure;
1197 stypes >>= 1; 1301 (*chan_start)++;
1198 i++; 1302 if (split)
1303 break;
1304 default:
1305 /* add frequencies */
1306 nl_freqs = nla_nest_start(
1307 msg, NL80211_BAND_ATTR_FREQS);
1308 if (!nl_freqs)
1309 goto nla_put_failure;
1310
1311 for (i = *chan_start - 1;
1312 i < sband->n_channels;
1313 i++) {
1314 nl_freq = nla_nest_start(msg, i);
1315 if (!nl_freq)
1316 goto nla_put_failure;
1317
1318 chan = &sband->channels[i];
1319
1320 if (nl80211_msg_put_channel(msg, chan,
1321 split))
1322 goto nla_put_failure;
1323
1324 nla_nest_end(msg, nl_freq);
1325 if (split)
1326 break;
1327 }
1328 if (i < sband->n_channels)
1329 *chan_start = i + 2;
1330 else
1331 *chan_start = 0;
1332 nla_nest_end(msg, nl_freqs);
1333 }
1334
1335 nla_nest_end(msg, nl_band);
1336
1337 if (split) {
1338 /* start again here */
1339 if (*chan_start)
1340 band--;
1341 break;
1199 } 1342 }
1200 nla_nest_end(msg, nl_ftypes);
1201 } 1343 }
1344 nla_nest_end(msg, nl_bands);
1202 1345
1203 nla_nest_end(msg, nl_ifs); 1346 if (band < IEEE80211_NUM_BANDS)
1347 *band_start = band + 1;
1348 else
1349 *band_start = 0;
1204 1350
1205 nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); 1351 /* if bands & channels are done, continue outside */
1206 if (!nl_ifs) 1352 if (*band_start == 0 && *chan_start == 0)
1353 (*split_start)++;
1354 if (split)
1355 break;
1356 case 4:
1357 nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
1358 if (!nl_cmds)
1207 goto nla_put_failure; 1359 goto nla_put_failure;
1208 1360
1209 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { 1361 i = 0;
1210 nl_ftypes = nla_nest_start(msg, ift); 1362#define CMD(op, n) \
1211 if (!nl_ftypes) 1363 do { \
1364 if (dev->ops->op) { \
1365 i++; \
1366 if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
1367 goto nla_put_failure; \
1368 } \
1369 } while (0)
1370
1371 CMD(add_virtual_intf, NEW_INTERFACE);
1372 CMD(change_virtual_intf, SET_INTERFACE);
1373 CMD(add_key, NEW_KEY);
1374 CMD(start_ap, START_AP);
1375 CMD(add_station, NEW_STATION);
1376 CMD(add_mpath, NEW_MPATH);
1377 CMD(update_mesh_config, SET_MESH_CONFIG);
1378 CMD(change_bss, SET_BSS);
1379 CMD(auth, AUTHENTICATE);
1380 CMD(assoc, ASSOCIATE);
1381 CMD(deauth, DEAUTHENTICATE);
1382 CMD(disassoc, DISASSOCIATE);
1383 CMD(join_ibss, JOIN_IBSS);
1384 CMD(join_mesh, JOIN_MESH);
1385 CMD(set_pmksa, SET_PMKSA);
1386 CMD(del_pmksa, DEL_PMKSA);
1387 CMD(flush_pmksa, FLUSH_PMKSA);
1388 if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
1389 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
1390 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
1391 CMD(mgmt_tx, FRAME);
1392 CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
1393 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
1394 i++;
1395 if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
1212 goto nla_put_failure; 1396 goto nla_put_failure;
1213 i = 0;
1214 stypes = mgmt_stypes[ift].rx;
1215 while (stypes) {
1216 if ((stypes & 1) &&
1217 nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
1218 (i << 4) | IEEE80211_FTYPE_MGMT))
1219 goto nla_put_failure;
1220 stypes >>= 1;
1221 i++;
1222 }
1223 nla_nest_end(msg, nl_ftypes);
1224 } 1397 }
1225 nla_nest_end(msg, nl_ifs); 1398 if (dev->ops->set_monitor_channel || dev->ops->start_ap ||
1226 } 1399 dev->ops->join_mesh) {
1400 i++;
1401 if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
1402 goto nla_put_failure;
1403 }
1404 CMD(set_wds_peer, SET_WDS_PEER);
1405 if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
1406 CMD(tdls_mgmt, TDLS_MGMT);
1407 CMD(tdls_oper, TDLS_OPER);
1408 }
1409 if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
1410 CMD(sched_scan_start, START_SCHED_SCAN);
1411 CMD(probe_client, PROBE_CLIENT);
1412 CMD(set_noack_map, SET_NOACK_MAP);
1413 if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
1414 i++;
1415 if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
1416 goto nla_put_failure;
1417 }
1418 CMD(start_p2p_device, START_P2P_DEVICE);
1419 CMD(set_mcast_rate, SET_MCAST_RATE);
1227 1420
1228#ifdef CONFIG_PM 1421#ifdef CONFIG_NL80211_TESTMODE
1229 if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) { 1422 CMD(testmode_cmd, TESTMODE);
1230 struct nlattr *nl_wowlan; 1423#endif
1231 1424
1232 nl_wowlan = nla_nest_start(msg, 1425#undef CMD
1233 NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
1234 if (!nl_wowlan)
1235 goto nla_put_failure;
1236 1426
1237 if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && 1427 if (dev->ops->connect || dev->ops->auth) {
1238 nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || 1428 i++;
1239 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && 1429 if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
1240 nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
1241 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) &&
1242 nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
1243 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
1244 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
1245 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
1246 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
1247 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
1248 nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
1249 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
1250 nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
1251 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
1252 nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
1253 goto nla_put_failure;
1254 if (dev->wiphy.wowlan.n_patterns) {
1255 struct nl80211_wowlan_pattern_support pat = {
1256 .max_patterns = dev->wiphy.wowlan.n_patterns,
1257 .min_pattern_len =
1258 dev->wiphy.wowlan.pattern_min_len,
1259 .max_pattern_len =
1260 dev->wiphy.wowlan.pattern_max_len,
1261 .max_pkt_offset =
1262 dev->wiphy.wowlan.max_pkt_offset,
1263 };
1264 if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
1265 sizeof(pat), &pat))
1266 goto nla_put_failure; 1430 goto nla_put_failure;
1267 } 1431 }
1268 1432
1269 nla_nest_end(msg, nl_wowlan); 1433 if (dev->ops->disconnect || dev->ops->deauth) {
1270 } 1434 i++;
1435 if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
1436 goto nla_put_failure;
1437 }
1438
1439 nla_nest_end(msg, nl_cmds);
1440 (*split_start)++;
1441 if (split)
1442 break;
1443 case 5:
1444 if (dev->ops->remain_on_channel &&
1445 (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
1446 nla_put_u32(msg,
1447 NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
1448 dev->wiphy.max_remain_on_channel_duration))
1449 goto nla_put_failure;
1450
1451 if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
1452 nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK))
1453 goto nla_put_failure;
1454
1455 if (nl80211_send_mgmt_stypes(msg, mgmt_stypes))
1456 goto nla_put_failure;
1457 (*split_start)++;
1458 if (split)
1459 break;
1460 case 6:
1461#ifdef CONFIG_PM
1462 if (nl80211_send_wowlan(msg, dev, split))
1463 goto nla_put_failure;
1464 (*split_start)++;
1465 if (split)
1466 break;
1467#else
1468 (*split_start)++;
1271#endif 1469#endif
1470 case 7:
1471 if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
1472 dev->wiphy.software_iftypes))
1473 goto nla_put_failure;
1272 1474
1273 if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, 1475 if (nl80211_put_iface_combinations(&dev->wiphy, msg, split))
1274 dev->wiphy.software_iftypes)) 1476 goto nla_put_failure;
1275 goto nla_put_failure;
1276 1477
1277 if (nl80211_put_iface_combinations(&dev->wiphy, msg)) 1478 (*split_start)++;
1278 goto nla_put_failure; 1479 if (split)
1480 break;
1481 case 8:
1482 if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
1483 nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
1484 dev->wiphy.ap_sme_capa))
1485 goto nla_put_failure;
1279 1486
1280 if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && 1487 features = dev->wiphy.features;
1281 nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, 1488 /*
1282 dev->wiphy.ap_sme_capa)) 1489 * We can only add the per-channel limit information if the
1283 goto nla_put_failure; 1490 * dump is split, otherwise it makes it too big. Therefore
1491 * only advertise it in that case.
1492 */
1493 if (split)
1494 features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS;
1495 if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features))
1496 goto nla_put_failure;
1284 1497
1285 if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, 1498 if (dev->wiphy.ht_capa_mod_mask &&
1286 dev->wiphy.features)) 1499 nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
1287 goto nla_put_failure; 1500 sizeof(*dev->wiphy.ht_capa_mod_mask),
1501 dev->wiphy.ht_capa_mod_mask))
1502 goto nla_put_failure;
1288 1503
1289 if (dev->wiphy.ht_capa_mod_mask && 1504 if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
1290 nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, 1505 dev->wiphy.max_acl_mac_addrs &&
1291 sizeof(*dev->wiphy.ht_capa_mod_mask), 1506 nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX,
1292 dev->wiphy.ht_capa_mod_mask)) 1507 dev->wiphy.max_acl_mac_addrs))
1293 goto nla_put_failure; 1508 goto nla_put_failure;
1294 1509
1295 if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && 1510 /*
1296 dev->wiphy.max_acl_mac_addrs && 1511 * Any information below this point is only available to
1297 nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, 1512 * applications that can deal with it being split. This
1298 dev->wiphy.max_acl_mac_addrs)) 1513 * helps ensure that newly added capabilities don't break
1299 goto nla_put_failure; 1514 * older tools by overrunning their buffers.
1515 *
1516 * We still increment split_start so that in the split
1517 * case we'll continue with more data in the next round,
1518 * but break unconditionally so unsplit data stops here.
1519 */
1520 (*split_start)++;
1521 break;
1522 case 9:
1523 if (dev->wiphy.extended_capabilities &&
1524 (nla_put(msg, NL80211_ATTR_EXT_CAPA,
1525 dev->wiphy.extended_capabilities_len,
1526 dev->wiphy.extended_capabilities) ||
1527 nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK,
1528 dev->wiphy.extended_capabilities_len,
1529 dev->wiphy.extended_capabilities_mask)))
1530 goto nla_put_failure;
1300 1531
1532 if (dev->wiphy.vht_capa_mod_mask &&
1533 nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK,
1534 sizeof(*dev->wiphy.vht_capa_mod_mask),
1535 dev->wiphy.vht_capa_mod_mask))
1536 goto nla_put_failure;
1537
1538 /* done */
1539 *split_start = 0;
1540 break;
1541 }
1301 return genlmsg_end(msg, hdr); 1542 return genlmsg_end(msg, hdr);
1302 1543
1303 nla_put_failure: 1544 nla_put_failure:
@@ -1310,39 +1551,80 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
1310 int idx = 0, ret; 1551 int idx = 0, ret;
1311 int start = cb->args[0]; 1552 int start = cb->args[0];
1312 struct cfg80211_registered_device *dev; 1553 struct cfg80211_registered_device *dev;
1554 s64 filter_wiphy = -1;
1555 bool split = false;
1556 struct nlattr **tb = nl80211_fam.attrbuf;
1557 int res;
1313 1558
1314 mutex_lock(&cfg80211_mutex); 1559 mutex_lock(&cfg80211_mutex);
1560 res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
1561 tb, nl80211_fam.maxattr, nl80211_policy);
1562 if (res == 0) {
1563 split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP];
1564 if (tb[NL80211_ATTR_WIPHY])
1565 filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]);
1566 if (tb[NL80211_ATTR_WDEV])
1567 filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32;
1568 if (tb[NL80211_ATTR_IFINDEX]) {
1569 struct net_device *netdev;
1570 int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]);
1571
1572 netdev = dev_get_by_index(sock_net(skb->sk), ifidx);
1573 if (!netdev) {
1574 mutex_unlock(&cfg80211_mutex);
1575 return -ENODEV;
1576 }
1577 if (netdev->ieee80211_ptr) {
1578 dev = wiphy_to_dev(
1579 netdev->ieee80211_ptr->wiphy);
1580 filter_wiphy = dev->wiphy_idx;
1581 }
1582 dev_put(netdev);
1583 }
1584 }
1585
1315 list_for_each_entry(dev, &cfg80211_rdev_list, list) { 1586 list_for_each_entry(dev, &cfg80211_rdev_list, list) {
1316 if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) 1587 if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
1317 continue; 1588 continue;
1318 if (++idx <= start) 1589 if (++idx <= start)
1319 continue; 1590 continue;
1320 ret = nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, 1591 if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy)
1321 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1592 continue;
1322 dev); 1593 /* attempt to fit multiple wiphy data chunks into the skb */
1323 if (ret < 0) { 1594 do {
1324 /* 1595 ret = nl80211_send_wiphy(dev, skb,
1325 * If sending the wiphy data didn't fit (ENOBUFS or 1596 NETLINK_CB(cb->skb).portid,
1326 * EMSGSIZE returned), this SKB is still empty (so 1597 cb->nlh->nlmsg_seq,
1327 * it's not too big because another wiphy dataset is 1598 NLM_F_MULTI,
1328 * already in the skb) and we've not tried to adjust 1599 split, &cb->args[1],
1329 * the dump allocation yet ... then adjust the alloc 1600 &cb->args[2],
1330 * size to be bigger, and return 1 but with the empty 1601 &cb->args[3]);
1331 * skb. This results in an empty message being RX'ed 1602 if (ret < 0) {
1332 * in userspace, but that is ignored. 1603 /*
1333 * 1604 * If sending the wiphy data didn't fit (ENOBUFS
1334 * We can then retry with the larger buffer. 1605 * or EMSGSIZE returned), this SKB is still
1335 */ 1606 * empty (so it's not too big because another
1336 if ((ret == -ENOBUFS || ret == -EMSGSIZE) && 1607 * wiphy dataset is already in the skb) and
1337 !skb->len && 1608 * we've not tried to adjust the dump allocation
1338 cb->min_dump_alloc < 4096) { 1609 * yet ... then adjust the alloc size to be
1339 cb->min_dump_alloc = 4096; 1610 * bigger, and return 1 but with the empty skb.
1340 mutex_unlock(&cfg80211_mutex); 1611 * This results in an empty message being RX'ed
1341 return 1; 1612 * in userspace, but that is ignored.
1613 *
1614 * We can then retry with the larger buffer.
1615 */
1616 if ((ret == -ENOBUFS || ret == -EMSGSIZE) &&
1617 !skb->len &&
1618 cb->min_dump_alloc < 4096) {
1619 cb->min_dump_alloc = 4096;
1620 mutex_unlock(&cfg80211_mutex);
1621 return 1;
1622 }
1623 idx--;
1624 break;
1342 } 1625 }
1343 idx--; 1626 } while (cb->args[1] > 0);
1344 break; 1627 break;
1345 }
1346 } 1628 }
1347 mutex_unlock(&cfg80211_mutex); 1629 mutex_unlock(&cfg80211_mutex);
1348 1630
@@ -1360,7 +1642,8 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
1360 if (!msg) 1642 if (!msg)
1361 return -ENOMEM; 1643 return -ENOMEM;
1362 1644
1363 if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) { 1645 if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0,
1646 false, NULL, NULL, NULL) < 0) {
1364 nlmsg_free(msg); 1647 nlmsg_free(msg);
1365 return -ENOBUFS; 1648 return -ENOBUFS;
1366 } 1649 }
@@ -2967,6 +3250,7 @@ static int parse_station_flags(struct genl_info *info,
2967 sta_flags = nla_data(nla); 3250 sta_flags = nla_data(nla);
2968 params->sta_flags_mask = sta_flags->mask; 3251 params->sta_flags_mask = sta_flags->mask;
2969 params->sta_flags_set = sta_flags->set; 3252 params->sta_flags_set = sta_flags->set;
3253 params->sta_flags_set &= params->sta_flags_mask;
2970 if ((params->sta_flags_mask | 3254 if ((params->sta_flags_mask |
2971 params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID)) 3255 params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID))
2972 return -EINVAL; 3256 return -EINVAL;
@@ -3320,6 +3604,136 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
3320 return genlmsg_reply(msg, info); 3604 return genlmsg_reply(msg, info);
3321} 3605}
3322 3606
3607int cfg80211_check_station_change(struct wiphy *wiphy,
3608 struct station_parameters *params,
3609 enum cfg80211_station_type statype)
3610{
3611 if (params->listen_interval != -1)
3612 return -EINVAL;
3613 if (params->aid)
3614 return -EINVAL;
3615
3616 /* When you run into this, adjust the code below for the new flag */
3617 BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
3618
3619 switch (statype) {
3620 case CFG80211_STA_MESH_PEER_KERNEL:
3621 case CFG80211_STA_MESH_PEER_USER:
3622 /*
3623 * No ignoring the TDLS flag here -- the userspace mesh
3624 * code doesn't have the bug of including TDLS in the
3625 * mask everywhere.
3626 */
3627 if (params->sta_flags_mask &
3628 ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3629 BIT(NL80211_STA_FLAG_MFP) |
3630 BIT(NL80211_STA_FLAG_AUTHORIZED)))
3631 return -EINVAL;
3632 break;
3633 case CFG80211_STA_TDLS_PEER_SETUP:
3634 case CFG80211_STA_TDLS_PEER_ACTIVE:
3635 if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
3636 return -EINVAL;
3637 /* ignore since it can't change */
3638 params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
3639 break;
3640 default:
3641 /* disallow mesh-specific things */
3642 if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
3643 return -EINVAL;
3644 if (params->local_pm)
3645 return -EINVAL;
3646 if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
3647 return -EINVAL;
3648 }
3649
3650 if (statype != CFG80211_STA_TDLS_PEER_SETUP &&
3651 statype != CFG80211_STA_TDLS_PEER_ACTIVE) {
3652 /* TDLS can't be set, ... */
3653 if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
3654 return -EINVAL;
3655 /*
3656 * ... but don't bother the driver with it. This works around
3657 * a hostapd/wpa_supplicant issue -- it always includes the
3658 * TLDS_PEER flag in the mask even for AP mode.
3659 */
3660 params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
3661 }
3662
3663 if (statype != CFG80211_STA_TDLS_PEER_SETUP) {
3664 /* reject other things that can't change */
3665 if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD)
3666 return -EINVAL;
3667 if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY)
3668 return -EINVAL;
3669 if (params->supported_rates)
3670 return -EINVAL;
3671 if (params->ext_capab || params->ht_capa || params->vht_capa)
3672 return -EINVAL;
3673 }
3674
3675 if (statype != CFG80211_STA_AP_CLIENT) {
3676 if (params->vlan)
3677 return -EINVAL;
3678 }
3679
3680 switch (statype) {
3681 case CFG80211_STA_AP_MLME_CLIENT:
3682 /* Use this only for authorizing/unauthorizing a station */
3683 if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)))
3684 return -EOPNOTSUPP;
3685 break;
3686 case CFG80211_STA_AP_CLIENT:
3687 /* accept only the listed bits */
3688 if (params->sta_flags_mask &
3689 ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
3690 BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3691 BIT(NL80211_STA_FLAG_ASSOCIATED) |
3692 BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
3693 BIT(NL80211_STA_FLAG_WME) |
3694 BIT(NL80211_STA_FLAG_MFP)))
3695 return -EINVAL;
3696
3697 /* but authenticated/associated only if driver handles it */
3698 if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
3699 params->sta_flags_mask &
3700 (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3701 BIT(NL80211_STA_FLAG_ASSOCIATED)))
3702 return -EINVAL;
3703 break;
3704 case CFG80211_STA_IBSS:
3705 case CFG80211_STA_AP_STA:
3706 /* reject any changes other than AUTHORIZED */
3707 if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
3708 return -EINVAL;
3709 break;
3710 case CFG80211_STA_TDLS_PEER_SETUP:
3711 /* reject any changes other than AUTHORIZED or WME */
3712 if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
3713 BIT(NL80211_STA_FLAG_WME)))
3714 return -EINVAL;
3715 /* force (at least) rates when authorizing */
3716 if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) &&
3717 !params->supported_rates)
3718 return -EINVAL;
3719 break;
3720 case CFG80211_STA_TDLS_PEER_ACTIVE:
3721 /* reject any changes */
3722 return -EINVAL;
3723 case CFG80211_STA_MESH_PEER_KERNEL:
3724 if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
3725 return -EINVAL;
3726 break;
3727 case CFG80211_STA_MESH_PEER_USER:
3728 if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
3729 return -EINVAL;
3730 break;
3731 }
3732
3733 return 0;
3734}
3735EXPORT_SYMBOL(cfg80211_check_station_change);
3736
3323/* 3737/*
3324 * Get vlan interface making sure it is running and on the right wiphy. 3738 * Get vlan interface making sure it is running and on the right wiphy.
3325 */ 3739 */
@@ -3342,6 +3756,13 @@ static struct net_device *get_vlan(struct genl_info *info,
3342 goto error; 3756 goto error;
3343 } 3757 }
3344 3758
3759 if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
3760 v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
3761 v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
3762 ret = -EINVAL;
3763 goto error;
3764 }
3765
3345 if (!netif_running(v)) { 3766 if (!netif_running(v)) {
3346 ret = -ENETDOWN; 3767 ret = -ENETDOWN;
3347 goto error; 3768 goto error;
@@ -3359,21 +3780,13 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = {
3359 [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, 3780 [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
3360}; 3781};
3361 3782
3362static int nl80211_set_station_tdls(struct genl_info *info, 3783static int nl80211_parse_sta_wme(struct genl_info *info,
3363 struct station_parameters *params) 3784 struct station_parameters *params)
3364{ 3785{
3365 struct nlattr *tb[NL80211_STA_WME_MAX + 1]; 3786 struct nlattr *tb[NL80211_STA_WME_MAX + 1];
3366 struct nlattr *nla; 3787 struct nlattr *nla;
3367 int err; 3788 int err;
3368 3789
3369 /* Dummy STA entry gets updated once the peer capabilities are known */
3370 if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
3371 params->ht_capa =
3372 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
3373 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
3374 params->vht_capa =
3375 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
3376
3377 /* parse WME attributes if present */ 3790 /* parse WME attributes if present */
3378 if (!info->attrs[NL80211_ATTR_STA_WME]) 3791 if (!info->attrs[NL80211_ATTR_STA_WME])
3379 return 0; 3792 return 0;
@@ -3401,18 +3814,34 @@ static int nl80211_set_station_tdls(struct genl_info *info,
3401 return 0; 3814 return 0;
3402} 3815}
3403 3816
3817static int nl80211_set_station_tdls(struct genl_info *info,
3818 struct station_parameters *params)
3819{
3820 /* Dummy STA entry gets updated once the peer capabilities are known */
3821 if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
3822 params->ht_capa =
3823 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
3824 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
3825 params->vht_capa =
3826 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
3827
3828 return nl80211_parse_sta_wme(info, params);
3829}
3830
3404static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) 3831static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
3405{ 3832{
3406 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 3833 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3407 int err;
3408 struct net_device *dev = info->user_ptr[1]; 3834 struct net_device *dev = info->user_ptr[1];
3409 struct station_parameters params; 3835 struct station_parameters params;
3410 u8 *mac_addr = NULL; 3836 u8 *mac_addr;
3837 int err;
3411 3838
3412 memset(&params, 0, sizeof(params)); 3839 memset(&params, 0, sizeof(params));
3413 3840
3414 params.listen_interval = -1; 3841 params.listen_interval = -1;
3415 params.plink_state = -1; 3842
3843 if (!rdev->ops->change_station)
3844 return -EOPNOTSUPP;
3416 3845
3417 if (info->attrs[NL80211_ATTR_STA_AID]) 3846 if (info->attrs[NL80211_ATTR_STA_AID])
3418 return -EINVAL; 3847 return -EINVAL;
@@ -3445,19 +3874,23 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
3445 if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) 3874 if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
3446 return -EINVAL; 3875 return -EINVAL;
3447 3876
3448 if (!rdev->ops->change_station)
3449 return -EOPNOTSUPP;
3450
3451 if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params)) 3877 if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
3452 return -EINVAL; 3878 return -EINVAL;
3453 3879
3454 if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) 3880 if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
3455 params.plink_action = 3881 params.plink_action =
3456 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); 3882 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
3883 if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
3884 return -EINVAL;
3885 }
3457 3886
3458 if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) 3887 if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) {
3459 params.plink_state = 3888 params.plink_state =
3460 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); 3889 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
3890 if (params.plink_state >= NUM_NL80211_PLINK_STATES)
3891 return -EINVAL;
3892 params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE;
3893 }
3461 3894
3462 if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { 3895 if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) {
3463 enum nl80211_mesh_power_mode pm = nla_get_u32( 3896 enum nl80211_mesh_power_mode pm = nla_get_u32(
@@ -3470,127 +3903,33 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
3470 params.local_pm = pm; 3903 params.local_pm = pm;
3471 } 3904 }
3472 3905
3906 /* Include parameters for TDLS peer (will check later) */
3907 err = nl80211_set_station_tdls(info, &params);
3908 if (err)
3909 return err;
3910
3911 params.vlan = get_vlan(info, rdev);
3912 if (IS_ERR(params.vlan))
3913 return PTR_ERR(params.vlan);
3914
3473 switch (dev->ieee80211_ptr->iftype) { 3915 switch (dev->ieee80211_ptr->iftype) {
3474 case NL80211_IFTYPE_AP: 3916 case NL80211_IFTYPE_AP:
3475 case NL80211_IFTYPE_AP_VLAN: 3917 case NL80211_IFTYPE_AP_VLAN:
3476 case NL80211_IFTYPE_P2P_GO: 3918 case NL80211_IFTYPE_P2P_GO:
3477 /* disallow mesh-specific things */
3478 if (params.plink_action)
3479 return -EINVAL;
3480 if (params.local_pm)
3481 return -EINVAL;
3482
3483 /* TDLS can't be set, ... */
3484 if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
3485 return -EINVAL;
3486 /*
3487 * ... but don't bother the driver with it. This works around
3488 * a hostapd/wpa_supplicant issue -- it always includes the
3489 * TLDS_PEER flag in the mask even for AP mode.
3490 */
3491 params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
3492
3493 /* accept only the listed bits */
3494 if (params.sta_flags_mask &
3495 ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
3496 BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3497 BIT(NL80211_STA_FLAG_ASSOCIATED) |
3498 BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
3499 BIT(NL80211_STA_FLAG_WME) |
3500 BIT(NL80211_STA_FLAG_MFP)))
3501 return -EINVAL;
3502
3503 /* but authenticated/associated only if driver handles it */
3504 if (!(rdev->wiphy.features &
3505 NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
3506 params.sta_flags_mask &
3507 (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3508 BIT(NL80211_STA_FLAG_ASSOCIATED)))
3509 return -EINVAL;
3510
3511 /* reject other things that can't change */
3512 if (params.supported_rates)
3513 return -EINVAL;
3514 if (info->attrs[NL80211_ATTR_STA_CAPABILITY])
3515 return -EINVAL;
3516 if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY])
3517 return -EINVAL;
3518 if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
3519 info->attrs[NL80211_ATTR_VHT_CAPABILITY])
3520 return -EINVAL;
3521
3522 /* must be last in here for error handling */
3523 params.vlan = get_vlan(info, rdev);
3524 if (IS_ERR(params.vlan))
3525 return PTR_ERR(params.vlan);
3526 break;
3527 case NL80211_IFTYPE_P2P_CLIENT: 3919 case NL80211_IFTYPE_P2P_CLIENT:
3528 case NL80211_IFTYPE_STATION: 3920 case NL80211_IFTYPE_STATION:
3529 /*
3530 * Don't allow userspace to change the TDLS_PEER flag,
3531 * but silently ignore attempts to change it since we
3532 * don't have state here to verify that it doesn't try
3533 * to change the flag.
3534 */
3535 params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
3536 /* Include parameters for TDLS peer (driver will check) */
3537 err = nl80211_set_station_tdls(info, &params);
3538 if (err)
3539 return err;
3540 /* disallow things sta doesn't support */
3541 if (params.plink_action)
3542 return -EINVAL;
3543 if (params.local_pm)
3544 return -EINVAL;
3545 /* reject any changes other than AUTHORIZED or WME (for TDLS) */
3546 if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
3547 BIT(NL80211_STA_FLAG_WME)))
3548 return -EINVAL;
3549 break;
3550 case NL80211_IFTYPE_ADHOC: 3921 case NL80211_IFTYPE_ADHOC:
3551 /* disallow things sta doesn't support */
3552 if (params.plink_action)
3553 return -EINVAL;
3554 if (params.local_pm)
3555 return -EINVAL;
3556 if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
3557 info->attrs[NL80211_ATTR_VHT_CAPABILITY])
3558 return -EINVAL;
3559 /* reject any changes other than AUTHORIZED */
3560 if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
3561 return -EINVAL;
3562 break;
3563 case NL80211_IFTYPE_MESH_POINT: 3922 case NL80211_IFTYPE_MESH_POINT:
3564 /* disallow things mesh doesn't support */
3565 if (params.vlan)
3566 return -EINVAL;
3567 if (params.supported_rates)
3568 return -EINVAL;
3569 if (info->attrs[NL80211_ATTR_STA_CAPABILITY])
3570 return -EINVAL;
3571 if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY])
3572 return -EINVAL;
3573 if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
3574 info->attrs[NL80211_ATTR_VHT_CAPABILITY])
3575 return -EINVAL;
3576 /*
3577 * No special handling for TDLS here -- the userspace
3578 * mesh code doesn't have this bug.
3579 */
3580 if (params.sta_flags_mask &
3581 ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3582 BIT(NL80211_STA_FLAG_MFP) |
3583 BIT(NL80211_STA_FLAG_AUTHORIZED)))
3584 return -EINVAL;
3585 break; 3923 break;
3586 default: 3924 default:
3587 return -EOPNOTSUPP; 3925 err = -EOPNOTSUPP;
3926 goto out_put_vlan;
3588 } 3927 }
3589 3928
3590 /* be aware of params.vlan when changing code here */ 3929 /* driver will call cfg80211_check_station_change() */
3591
3592 err = rdev_change_station(rdev, dev, mac_addr, &params); 3930 err = rdev_change_station(rdev, dev, mac_addr, &params);
3593 3931
3932 out_put_vlan:
3594 if (params.vlan) 3933 if (params.vlan)
3595 dev_put(params.vlan); 3934 dev_put(params.vlan);
3596 3935
@@ -3607,6 +3946,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3607 3946
3608 memset(&params, 0, sizeof(params)); 3947 memset(&params, 0, sizeof(params));
3609 3948
3949 if (!rdev->ops->add_station)
3950 return -EOPNOTSUPP;
3951
3610 if (!info->attrs[NL80211_ATTR_MAC]) 3952 if (!info->attrs[NL80211_ATTR_MAC])
3611 return -EINVAL; 3953 return -EINVAL;
3612 3954
@@ -3652,50 +3994,32 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3652 params.vht_capa = 3994 params.vht_capa =
3653 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); 3995 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
3654 3996
3655 if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) 3997 if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
3656 params.plink_action = 3998 params.plink_action =
3657 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); 3999 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
4000 if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
4001 return -EINVAL;
4002 }
3658 4003
3659 if (!rdev->ops->add_station) 4004 err = nl80211_parse_sta_wme(info, &params);
3660 return -EOPNOTSUPP; 4005 if (err)
4006 return err;
3661 4007
3662 if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params)) 4008 if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
3663 return -EINVAL; 4009 return -EINVAL;
3664 4010
4011 /* When you run into this, adjust the code below for the new flag */
4012 BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
4013
3665 switch (dev->ieee80211_ptr->iftype) { 4014 switch (dev->ieee80211_ptr->iftype) {
3666 case NL80211_IFTYPE_AP: 4015 case NL80211_IFTYPE_AP:
3667 case NL80211_IFTYPE_AP_VLAN: 4016 case NL80211_IFTYPE_AP_VLAN:
3668 case NL80211_IFTYPE_P2P_GO: 4017 case NL80211_IFTYPE_P2P_GO:
3669 /* parse WME attributes if sta is WME capable */ 4018 /* ignore WME attributes if iface/sta is not capable */
3670 if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && 4019 if (!(rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) ||
3671 (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) && 4020 !(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)))
3672 info->attrs[NL80211_ATTR_STA_WME]) { 4021 params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
3673 struct nlattr *tb[NL80211_STA_WME_MAX + 1];
3674 struct nlattr *nla;
3675
3676 nla = info->attrs[NL80211_ATTR_STA_WME];
3677 err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
3678 nl80211_sta_wme_policy);
3679 if (err)
3680 return err;
3681 4022
3682 if (tb[NL80211_STA_WME_UAPSD_QUEUES])
3683 params.uapsd_queues =
3684 nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]);
3685 if (params.uapsd_queues &
3686 ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK)
3687 return -EINVAL;
3688
3689 if (tb[NL80211_STA_WME_MAX_SP])
3690 params.max_sp =
3691 nla_get_u8(tb[NL80211_STA_WME_MAX_SP]);
3692
3693 if (params.max_sp &
3694 ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK)
3695 return -EINVAL;
3696
3697 params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD;
3698 }
3699 /* TDLS peers cannot be added */ 4023 /* TDLS peers cannot be added */
3700 if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) 4024 if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
3701 return -EINVAL; 4025 return -EINVAL;
@@ -3716,6 +4040,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3716 return PTR_ERR(params.vlan); 4040 return PTR_ERR(params.vlan);
3717 break; 4041 break;
3718 case NL80211_IFTYPE_MESH_POINT: 4042 case NL80211_IFTYPE_MESH_POINT:
4043 /* ignore uAPSD data */
4044 params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
4045
3719 /* associated is disallowed */ 4046 /* associated is disallowed */
3720 if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) 4047 if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED))
3721 return -EINVAL; 4048 return -EINVAL;
@@ -3724,8 +4051,14 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3724 return -EINVAL; 4051 return -EINVAL;
3725 break; 4052 break;
3726 case NL80211_IFTYPE_STATION: 4053 case NL80211_IFTYPE_STATION:
3727 /* associated is disallowed */ 4054 case NL80211_IFTYPE_P2P_CLIENT:
3728 if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) 4055 /* ignore uAPSD data */
4056 params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
4057
4058 /* these are disallowed */
4059 if (params.sta_flags_mask &
4060 (BIT(NL80211_STA_FLAG_ASSOCIATED) |
4061 BIT(NL80211_STA_FLAG_AUTHENTICATED)))
3729 return -EINVAL; 4062 return -EINVAL;
3730 /* Only TDLS peers can be added */ 4063 /* Only TDLS peers can be added */
3731 if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) 4064 if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
@@ -3736,6 +4069,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3736 /* ... with external setup is supported */ 4069 /* ... with external setup is supported */
3737 if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP)) 4070 if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP))
3738 return -EOPNOTSUPP; 4071 return -EOPNOTSUPP;
4072 /*
4073 * Older wpa_supplicant versions always mark the TDLS peer
4074 * as authorized, but it shouldn't yet be.
4075 */
4076 params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED);
3739 break; 4077 break;
3740 default: 4078 default:
3741 return -EOPNOTSUPP; 4079 return -EOPNOTSUPP;
@@ -4280,6 +4618,7 @@ static const struct nla_policy
4280 [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, 4618 [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
4281 [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, 4619 [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
4282 [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, 4620 [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
4621 [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG },
4283 [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, 4622 [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
4284 .len = IEEE80211_MAX_DATA_LEN }, 4623 .len = IEEE80211_MAX_DATA_LEN },
4285 [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG }, 4624 [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
@@ -4418,6 +4757,7 @@ do { \
4418static int nl80211_parse_mesh_setup(struct genl_info *info, 4757static int nl80211_parse_mesh_setup(struct genl_info *info,
4419 struct mesh_setup *setup) 4758 struct mesh_setup *setup)
4420{ 4759{
4760 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4421 struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1]; 4761 struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1];
4422 4762
4423 if (!info->attrs[NL80211_ATTR_MESH_SETUP]) 4763 if (!info->attrs[NL80211_ATTR_MESH_SETUP])
@@ -4454,8 +4794,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
4454 setup->ie = nla_data(ieattr); 4794 setup->ie = nla_data(ieattr);
4455 setup->ie_len = nla_len(ieattr); 4795 setup->ie_len = nla_len(ieattr);
4456 } 4796 }
4797 if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] &&
4798 !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM))
4799 return -EINVAL;
4800 setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]);
4457 setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]); 4801 setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
4458 setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]); 4802 setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]);
4803 if (setup->is_secure)
4804 setup->user_mpm = true;
4459 4805
4460 return 0; 4806 return 0;
4461} 4807}
@@ -5663,14 +6009,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
5663{ 6009{
5664 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 6010 struct cfg80211_registered_device *rdev = info->user_ptr[0];
5665 struct net_device *dev = info->user_ptr[1]; 6011 struct net_device *dev = info->user_ptr[1];
5666 struct cfg80211_crypto_settings crypto;
5667 struct ieee80211_channel *chan; 6012 struct ieee80211_channel *chan;
5668 const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL; 6013 struct cfg80211_assoc_request req = {};
5669 int err, ssid_len, ie_len = 0; 6014 const u8 *bssid, *ssid;
5670 bool use_mfp = false; 6015 int err, ssid_len = 0;
5671 u32 flags = 0;
5672 struct ieee80211_ht_cap *ht_capa = NULL;
5673 struct ieee80211_ht_cap *ht_capa_mask = NULL;
5674 6016
5675 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) 6017 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
5676 return -EINVAL; 6018 return -EINVAL;
@@ -5698,41 +6040,58 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
5698 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); 6040 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
5699 6041
5700 if (info->attrs[NL80211_ATTR_IE]) { 6042 if (info->attrs[NL80211_ATTR_IE]) {
5701 ie = nla_data(info->attrs[NL80211_ATTR_IE]); 6043 req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
5702 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); 6044 req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
5703 } 6045 }
5704 6046
5705 if (info->attrs[NL80211_ATTR_USE_MFP]) { 6047 if (info->attrs[NL80211_ATTR_USE_MFP]) {
5706 enum nl80211_mfp mfp = 6048 enum nl80211_mfp mfp =
5707 nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); 6049 nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
5708 if (mfp == NL80211_MFP_REQUIRED) 6050 if (mfp == NL80211_MFP_REQUIRED)
5709 use_mfp = true; 6051 req.use_mfp = true;
5710 else if (mfp != NL80211_MFP_NO) 6052 else if (mfp != NL80211_MFP_NO)
5711 return -EINVAL; 6053 return -EINVAL;
5712 } 6054 }
5713 6055
5714 if (info->attrs[NL80211_ATTR_PREV_BSSID]) 6056 if (info->attrs[NL80211_ATTR_PREV_BSSID])
5715 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); 6057 req.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
5716 6058
5717 if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) 6059 if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT]))
5718 flags |= ASSOC_REQ_DISABLE_HT; 6060 req.flags |= ASSOC_REQ_DISABLE_HT;
5719 6061
5720 if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) 6062 if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
5721 ht_capa_mask = 6063 memcpy(&req.ht_capa_mask,
5722 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]); 6064 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]),
6065 sizeof(req.ht_capa_mask));
5723 6066
5724 if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { 6067 if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
5725 if (!ht_capa_mask) 6068 if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
5726 return -EINVAL; 6069 return -EINVAL;
5727 ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); 6070 memcpy(&req.ht_capa,
6071 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]),
6072 sizeof(req.ht_capa));
5728 } 6073 }
5729 6074
5730 err = nl80211_crypto_settings(rdev, info, &crypto, 1); 6075 if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
6076 req.flags |= ASSOC_REQ_DISABLE_VHT;
6077
6078 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
6079 memcpy(&req.vht_capa_mask,
6080 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
6081 sizeof(req.vht_capa_mask));
6082
6083 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
6084 if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
6085 return -EINVAL;
6086 memcpy(&req.vht_capa,
6087 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]),
6088 sizeof(req.vht_capa));
6089 }
6090
6091 err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
5731 if (!err) 6092 if (!err)
5732 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, 6093 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid,
5733 ssid, ssid_len, ie, ie_len, use_mfp, 6094 ssid, ssid_len, &req);
5734 &crypto, flags, ht_capa,
5735 ht_capa_mask);
5736 6095
5737 return err; 6096 return err;
5738} 6097}
@@ -6312,6 +6671,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
6312 sizeof(connect.ht_capa)); 6671 sizeof(connect.ht_capa));
6313 } 6672 }
6314 6673
6674 if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
6675 connect.flags |= ASSOC_REQ_DISABLE_VHT;
6676
6677 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
6678 memcpy(&connect.vht_capa_mask,
6679 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
6680 sizeof(connect.vht_capa_mask));
6681
6682 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
6683 if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) {
6684 kfree(connkeys);
6685 return -EINVAL;
6686 }
6687 memcpy(&connect.vht_capa,
6688 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]),
6689 sizeof(connect.vht_capa));
6690 }
6691
6315 err = cfg80211_connect(rdev, dev, &connect, connkeys); 6692 err = cfg80211_connect(rdev, dev, &connect, connkeys);
6316 if (err) 6693 if (err)
6317 kfree(connkeys); 6694 kfree(connkeys);
@@ -7085,6 +7462,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
7085 return err; 7462 return err;
7086 } 7463 }
7087 7464
7465 if (setup.user_mpm)
7466 cfg.auto_open_plinks = false;
7467
7088 if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { 7468 if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
7089 err = nl80211_parse_chandef(rdev, info, &setup.chandef); 7469 err = nl80211_parse_chandef(rdev, info, &setup.chandef);
7090 if (err) 7470 if (err)
@@ -7284,7 +7664,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
7284 return -EINVAL; 7664 return -EINVAL;
7285 7665
7286 if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > 7666 if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) >
7287 rdev->wiphy.wowlan.tcp->data_interval_max) 7667 rdev->wiphy.wowlan.tcp->data_interval_max ||
7668 nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0)
7288 return -EINVAL; 7669 return -EINVAL;
7289 7670
7290 wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); 7671 wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]);
@@ -7769,6 +8150,54 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
7769 return 0; 8150 return 0;
7770} 8151}
7771 8152
8153static int nl80211_get_protocol_features(struct sk_buff *skb,
8154 struct genl_info *info)
8155{
8156 void *hdr;
8157 struct sk_buff *msg;
8158
8159 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
8160 if (!msg)
8161 return -ENOMEM;
8162
8163 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
8164 NL80211_CMD_GET_PROTOCOL_FEATURES);
8165 if (!hdr)
8166 goto nla_put_failure;
8167
8168 if (nla_put_u32(msg, NL80211_ATTR_PROTOCOL_FEATURES,
8169 NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP))
8170 goto nla_put_failure;
8171
8172 genlmsg_end(msg, hdr);
8173 return genlmsg_reply(msg, info);
8174
8175 nla_put_failure:
8176 kfree_skb(msg);
8177 return -ENOBUFS;
8178}
8179
8180static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info)
8181{
8182 struct cfg80211_registered_device *rdev = info->user_ptr[0];
8183 struct cfg80211_update_ft_ies_params ft_params;
8184 struct net_device *dev = info->user_ptr[1];
8185
8186 if (!rdev->ops->update_ft_ies)
8187 return -EOPNOTSUPP;
8188
8189 if (!info->attrs[NL80211_ATTR_MDID] ||
8190 !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
8191 return -EINVAL;
8192
8193 memset(&ft_params, 0, sizeof(ft_params));
8194 ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]);
8195 ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
8196 ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
8197
8198 return rdev_update_ft_ies(rdev, dev, &ft_params);
8199}
8200
7772#define NL80211_FLAG_NEED_WIPHY 0x01 8201#define NL80211_FLAG_NEED_WIPHY 0x01
7773#define NL80211_FLAG_NEED_NETDEV 0x02 8202#define NL80211_FLAG_NEED_NETDEV 0x02
7774#define NL80211_FLAG_NEED_RTNL 0x04 8203#define NL80211_FLAG_NEED_RTNL 0x04
@@ -8445,6 +8874,19 @@ static struct genl_ops nl80211_ops[] = {
8445 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | 8874 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
8446 NL80211_FLAG_NEED_RTNL, 8875 NL80211_FLAG_NEED_RTNL,
8447 }, 8876 },
8877 {
8878 .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
8879 .doit = nl80211_get_protocol_features,
8880 .policy = nl80211_policy,
8881 },
8882 {
8883 .cmd = NL80211_CMD_UPDATE_FT_IES,
8884 .doit = nl80211_update_ft_ies,
8885 .policy = nl80211_policy,
8886 .flags = GENL_ADMIN_PERM,
8887 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
8888 NL80211_FLAG_NEED_RTNL,
8889 },
8448}; 8890};
8449 8891
8450static struct genl_multicast_group nl80211_mlme_mcgrp = { 8892static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -8472,7 +8914,8 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
8472 if (!msg) 8914 if (!msg)
8473 return; 8915 return;
8474 8916
8475 if (nl80211_send_wiphy(msg, 0, 0, 0, rdev) < 0) { 8917 if (nl80211_send_wiphy(rdev, msg, 0, 0, 0,
8918 false, NULL, NULL, NULL) < 0) {
8476 nlmsg_free(msg); 8919 nlmsg_free(msg);
8477 return; 8920 return;
8478 } 8921 }
@@ -8796,21 +9239,31 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
8796 NL80211_CMD_DISASSOCIATE, gfp); 9239 NL80211_CMD_DISASSOCIATE, gfp);
8797} 9240}
8798 9241
8799void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev, 9242void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
8800 struct net_device *netdev, const u8 *buf, 9243 size_t len)
8801 size_t len, gfp_t gfp)
8802{ 9244{
8803 nl80211_send_mlme_event(rdev, netdev, buf, len, 9245 struct wireless_dev *wdev = dev->ieee80211_ptr;
8804 NL80211_CMD_UNPROT_DEAUTHENTICATE, gfp); 9246 struct wiphy *wiphy = wdev->wiphy;
9247 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9248
9249 trace_cfg80211_send_unprot_deauth(dev);
9250 nl80211_send_mlme_event(rdev, dev, buf, len,
9251 NL80211_CMD_UNPROT_DEAUTHENTICATE, GFP_ATOMIC);
8805} 9252}
9253EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
8806 9254
8807void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev, 9255void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
8808 struct net_device *netdev, const u8 *buf, 9256 size_t len)
8809 size_t len, gfp_t gfp)
8810{ 9257{
8811 nl80211_send_mlme_event(rdev, netdev, buf, len, 9258 struct wireless_dev *wdev = dev->ieee80211_ptr;
8812 NL80211_CMD_UNPROT_DISASSOCIATE, gfp); 9259 struct wiphy *wiphy = wdev->wiphy;
9260 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9261
9262 trace_cfg80211_send_unprot_disassoc(dev);
9263 nl80211_send_mlme_event(rdev, dev, buf, len,
9264 NL80211_CMD_UNPROT_DISASSOCIATE, GFP_ATOMIC);
8813} 9265}
9266EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);
8814 9267
8815static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, 9268static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
8816 struct net_device *netdev, int cmd, 9269 struct net_device *netdev, int cmd,
@@ -9013,14 +9466,19 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
9013 nlmsg_free(msg); 9466 nlmsg_free(msg);
9014} 9467}
9015 9468
9016void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev, 9469void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
9017 struct net_device *netdev, 9470 const u8* ie, u8 ie_len, gfp_t gfp)
9018 const u8 *macaddr, const u8* ie, u8 ie_len,
9019 gfp_t gfp)
9020{ 9471{
9472 struct wireless_dev *wdev = dev->ieee80211_ptr;
9473 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
9021 struct sk_buff *msg; 9474 struct sk_buff *msg;
9022 void *hdr; 9475 void *hdr;
9023 9476
9477 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
9478 return;
9479
9480 trace_cfg80211_notify_new_peer_candidate(dev, addr);
9481
9024 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 9482 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9025 if (!msg) 9483 if (!msg)
9026 return; 9484 return;
@@ -9032,8 +9490,8 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
9032 } 9490 }
9033 9491
9034 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || 9492 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
9035 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || 9493 nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
9036 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, macaddr) || 9494 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
9037 (ie_len && ie && 9495 (ie_len && ie &&
9038 nla_put(msg, NL80211_ATTR_IE, ie_len , ie))) 9496 nla_put(msg, NL80211_ATTR_IE, ie_len , ie)))
9039 goto nla_put_failure; 9497 goto nla_put_failure;
@@ -9048,6 +9506,7 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
9048 genlmsg_cancel(msg, hdr); 9506 genlmsg_cancel(msg, hdr);
9049 nlmsg_free(msg); 9507 nlmsg_free(msg);
9050} 9508}
9509EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
9051 9510
9052void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, 9511void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
9053 struct net_device *netdev, const u8 *addr, 9512 struct net_device *netdev, const u8 *addr,
@@ -9116,7 +9575,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
9116 nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE); 9575 nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE);
9117 if (!nl_freq) 9576 if (!nl_freq)
9118 goto nla_put_failure; 9577 goto nla_put_failure;
9119 if (nl80211_msg_put_channel(msg, channel_before)) 9578 if (nl80211_msg_put_channel(msg, channel_before, false))
9120 goto nla_put_failure; 9579 goto nla_put_failure;
9121 nla_nest_end(msg, nl_freq); 9580 nla_nest_end(msg, nl_freq);
9122 9581
@@ -9124,7 +9583,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
9124 nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER); 9583 nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER);
9125 if (!nl_freq) 9584 if (!nl_freq)
9126 goto nla_put_failure; 9585 goto nla_put_failure;
9127 if (nl80211_msg_put_channel(msg, channel_after)) 9586 if (nl80211_msg_put_channel(msg, channel_after, false))
9128 goto nla_put_failure; 9587 goto nla_put_failure;
9129 nla_nest_end(msg, nl_freq); 9588 nla_nest_end(msg, nl_freq);
9130 9589
@@ -9186,31 +9645,42 @@ static void nl80211_send_remain_on_chan_event(
9186 nlmsg_free(msg); 9645 nlmsg_free(msg);
9187} 9646}
9188 9647
9189void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, 9648void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie,
9190 struct wireless_dev *wdev, u64 cookie, 9649 struct ieee80211_channel *chan,
9191 struct ieee80211_channel *chan, 9650 unsigned int duration, gfp_t gfp)
9192 unsigned int duration, gfp_t gfp)
9193{ 9651{
9652 struct wiphy *wiphy = wdev->wiphy;
9653 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9654
9655 trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);
9194 nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, 9656 nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL,
9195 rdev, wdev, cookie, chan, 9657 rdev, wdev, cookie, chan,
9196 duration, gfp); 9658 duration, gfp);
9197} 9659}
9660EXPORT_SYMBOL(cfg80211_ready_on_channel);
9198 9661
9199void nl80211_send_remain_on_channel_cancel( 9662void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
9200 struct cfg80211_registered_device *rdev, 9663 struct ieee80211_channel *chan,
9201 struct wireless_dev *wdev, 9664 gfp_t gfp)
9202 u64 cookie, struct ieee80211_channel *chan, gfp_t gfp)
9203{ 9665{
9666 struct wiphy *wiphy = wdev->wiphy;
9667 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9668
9669 trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);
9204 nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, 9670 nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
9205 rdev, wdev, cookie, chan, 0, gfp); 9671 rdev, wdev, cookie, chan, 0, gfp);
9206} 9672}
9673EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
9207 9674
9208void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, 9675void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
9209 struct net_device *dev, const u8 *mac_addr, 9676 struct station_info *sinfo, gfp_t gfp)
9210 struct station_info *sinfo, gfp_t gfp)
9211{ 9677{
9678 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
9679 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9212 struct sk_buff *msg; 9680 struct sk_buff *msg;
9213 9681
9682 trace_cfg80211_new_sta(dev, mac_addr, sinfo);
9683
9214 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 9684 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9215 if (!msg) 9685 if (!msg)
9216 return; 9686 return;
@@ -9224,14 +9694,17 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
9224 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, 9694 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
9225 nl80211_mlme_mcgrp.id, gfp); 9695 nl80211_mlme_mcgrp.id, gfp);
9226} 9696}
9697EXPORT_SYMBOL(cfg80211_new_sta);
9227 9698
9228void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, 9699void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
9229 struct net_device *dev, const u8 *mac_addr,
9230 gfp_t gfp)
9231{ 9700{
9701 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
9702 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9232 struct sk_buff *msg; 9703 struct sk_buff *msg;
9233 void *hdr; 9704 void *hdr;
9234 9705
9706 trace_cfg80211_del_sta(dev, mac_addr);
9707
9235 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 9708 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9236 if (!msg) 9709 if (!msg)
9237 return; 9710 return;
@@ -9256,12 +9729,14 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
9256 genlmsg_cancel(msg, hdr); 9729 genlmsg_cancel(msg, hdr);
9257 nlmsg_free(msg); 9730 nlmsg_free(msg);
9258} 9731}
9732EXPORT_SYMBOL(cfg80211_del_sta);
9259 9733
9260void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, 9734void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
9261 struct net_device *dev, const u8 *mac_addr, 9735 enum nl80211_connect_failed_reason reason,
9262 enum nl80211_connect_failed_reason reason, 9736 gfp_t gfp)
9263 gfp_t gfp)
9264{ 9737{
9738 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
9739 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9265 struct sk_buff *msg; 9740 struct sk_buff *msg;
9266 void *hdr; 9741 void *hdr;
9267 9742
@@ -9290,6 +9765,7 @@ void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
9290 genlmsg_cancel(msg, hdr); 9765 genlmsg_cancel(msg, hdr);
9291 nlmsg_free(msg); 9766 nlmsg_free(msg);
9292} 9767}
9768EXPORT_SYMBOL(cfg80211_conn_failed);
9293 9769
9294static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, 9770static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
9295 const u8 *addr, gfp_t gfp) 9771 const u8 *addr, gfp_t gfp)
@@ -9334,19 +9810,47 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
9334 return true; 9810 return true;
9335} 9811}
9336 9812
9337bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp) 9813bool cfg80211_rx_spurious_frame(struct net_device *dev,
9814 const u8 *addr, gfp_t gfp)
9338{ 9815{
9339 return __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME, 9816 struct wireless_dev *wdev = dev->ieee80211_ptr;
9340 addr, gfp); 9817 bool ret;
9818
9819 trace_cfg80211_rx_spurious_frame(dev, addr);
9820
9821 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
9822 wdev->iftype != NL80211_IFTYPE_P2P_GO)) {
9823 trace_cfg80211_return_bool(false);
9824 return false;
9825 }
9826 ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME,
9827 addr, gfp);
9828 trace_cfg80211_return_bool(ret);
9829 return ret;
9341} 9830}
9831EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
9342 9832
9343bool nl80211_unexpected_4addr_frame(struct net_device *dev, 9833bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
9344 const u8 *addr, gfp_t gfp) 9834 const u8 *addr, gfp_t gfp)
9345{ 9835{
9346 return __nl80211_unexpected_frame(dev, 9836 struct wireless_dev *wdev = dev->ieee80211_ptr;
9347 NL80211_CMD_UNEXPECTED_4ADDR_FRAME, 9837 bool ret;
9348 addr, gfp); 9838
9839 trace_cfg80211_rx_unexpected_4addr_frame(dev, addr);
9840
9841 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
9842 wdev->iftype != NL80211_IFTYPE_P2P_GO &&
9843 wdev->iftype != NL80211_IFTYPE_AP_VLAN)) {
9844 trace_cfg80211_return_bool(false);
9845 return false;
9846 }
9847 ret = __nl80211_unexpected_frame(dev,
9848 NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
9849 addr, gfp);
9850 trace_cfg80211_return_bool(ret);
9851 return ret;
9349} 9852}
9853EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
9350 9854
9351int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, 9855int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
9352 struct wireless_dev *wdev, u32 nlportid, 9856 struct wireless_dev *wdev, u32 nlportid,
@@ -9386,15 +9890,17 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
9386 return -ENOBUFS; 9890 return -ENOBUFS;
9387} 9891}
9388 9892
9389void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, 9893void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
9390 struct wireless_dev *wdev, u64 cookie, 9894 const u8 *buf, size_t len, bool ack, gfp_t gfp)
9391 const u8 *buf, size_t len, bool ack,
9392 gfp_t gfp)
9393{ 9895{
9896 struct wiphy *wiphy = wdev->wiphy;
9897 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9394 struct net_device *netdev = wdev->netdev; 9898 struct net_device *netdev = wdev->netdev;
9395 struct sk_buff *msg; 9899 struct sk_buff *msg;
9396 void *hdr; 9900 void *hdr;
9397 9901
9902 trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
9903
9398 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 9904 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9399 if (!msg) 9905 if (!msg)
9400 return; 9906 return;
@@ -9422,17 +9928,21 @@ void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
9422 genlmsg_cancel(msg, hdr); 9928 genlmsg_cancel(msg, hdr);
9423 nlmsg_free(msg); 9929 nlmsg_free(msg);
9424} 9930}
9931EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
9425 9932
9426void 9933void cfg80211_cqm_rssi_notify(struct net_device *dev,
9427nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, 9934 enum nl80211_cqm_rssi_threshold_event rssi_event,
9428 struct net_device *netdev, 9935 gfp_t gfp)
9429 enum nl80211_cqm_rssi_threshold_event rssi_event,
9430 gfp_t gfp)
9431{ 9936{
9937 struct wireless_dev *wdev = dev->ieee80211_ptr;
9938 struct wiphy *wiphy = wdev->wiphy;
9939 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9432 struct sk_buff *msg; 9940 struct sk_buff *msg;
9433 struct nlattr *pinfoattr; 9941 struct nlattr *pinfoattr;
9434 void *hdr; 9942 void *hdr;
9435 9943
9944 trace_cfg80211_cqm_rssi_notify(dev, rssi_event);
9945
9436 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 9946 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9437 if (!msg) 9947 if (!msg)
9438 return; 9948 return;
@@ -9444,7 +9954,7 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
9444 } 9954 }
9445 9955
9446 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || 9956 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
9447 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) 9957 nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
9448 goto nla_put_failure; 9958 goto nla_put_failure;
9449 9959
9450 pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); 9960 pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
@@ -9467,10 +9977,11 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
9467 genlmsg_cancel(msg, hdr); 9977 genlmsg_cancel(msg, hdr);
9468 nlmsg_free(msg); 9978 nlmsg_free(msg);
9469} 9979}
9980EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
9470 9981
9471void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, 9982static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
9472 struct net_device *netdev, const u8 *bssid, 9983 struct net_device *netdev, const u8 *bssid,
9473 const u8 *replay_ctr, gfp_t gfp) 9984 const u8 *replay_ctr, gfp_t gfp)
9474{ 9985{
9475 struct sk_buff *msg; 9986 struct sk_buff *msg;
9476 struct nlattr *rekey_attr; 9987 struct nlattr *rekey_attr;
@@ -9512,9 +10023,22 @@ void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
9512 nlmsg_free(msg); 10023 nlmsg_free(msg);
9513} 10024}
9514 10025
9515void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, 10026void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
9516 struct net_device *netdev, int index, 10027 const u8 *replay_ctr, gfp_t gfp)
9517 const u8 *bssid, bool preauth, gfp_t gfp) 10028{
10029 struct wireless_dev *wdev = dev->ieee80211_ptr;
10030 struct wiphy *wiphy = wdev->wiphy;
10031 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
10032
10033 trace_cfg80211_gtk_rekey_notify(dev, bssid);
10034 nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp);
10035}
10036EXPORT_SYMBOL(cfg80211_gtk_rekey_notify);
10037
10038static void
10039nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
10040 struct net_device *netdev, int index,
10041 const u8 *bssid, bool preauth, gfp_t gfp)
9518{ 10042{
9519 struct sk_buff *msg; 10043 struct sk_buff *msg;
9520 struct nlattr *attr; 10044 struct nlattr *attr;
@@ -9557,9 +10081,22 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
9557 nlmsg_free(msg); 10081 nlmsg_free(msg);
9558} 10082}
9559 10083
9560void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, 10084void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
9561 struct net_device *netdev, 10085 const u8 *bssid, bool preauth, gfp_t gfp)
9562 struct cfg80211_chan_def *chandef, gfp_t gfp) 10086{
10087 struct wireless_dev *wdev = dev->ieee80211_ptr;
10088 struct wiphy *wiphy = wdev->wiphy;
10089 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
10090
10091 trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth);
10092 nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
10093}
10094EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
10095
10096static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
10097 struct net_device *netdev,
10098 struct cfg80211_chan_def *chandef,
10099 gfp_t gfp)
9563{ 10100{
9564 struct sk_buff *msg; 10101 struct sk_buff *msg;
9565 void *hdr; 10102 void *hdr;
@@ -9591,11 +10128,36 @@ void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
9591 nlmsg_free(msg); 10128 nlmsg_free(msg);
9592} 10129}
9593 10130
9594void 10131void cfg80211_ch_switch_notify(struct net_device *dev,
9595nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, 10132 struct cfg80211_chan_def *chandef)
9596 struct net_device *netdev, const u8 *peer,
9597 u32 num_packets, u32 rate, u32 intvl, gfp_t gfp)
9598{ 10133{
10134 struct wireless_dev *wdev = dev->ieee80211_ptr;
10135 struct wiphy *wiphy = wdev->wiphy;
10136 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
10137
10138 trace_cfg80211_ch_switch_notify(dev, chandef);
10139
10140 wdev_lock(wdev);
10141
10142 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
10143 wdev->iftype != NL80211_IFTYPE_P2P_GO))
10144 goto out;
10145
10146 wdev->channel = chandef->chan;
10147 nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL);
10148out:
10149 wdev_unlock(wdev);
10150 return;
10151}
10152EXPORT_SYMBOL(cfg80211_ch_switch_notify);
10153
10154void cfg80211_cqm_txe_notify(struct net_device *dev,
10155 const u8 *peer, u32 num_packets,
10156 u32 rate, u32 intvl, gfp_t gfp)
10157{
10158 struct wireless_dev *wdev = dev->ieee80211_ptr;
10159 struct wiphy *wiphy = wdev->wiphy;
10160 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9599 struct sk_buff *msg; 10161 struct sk_buff *msg;
9600 struct nlattr *pinfoattr; 10162 struct nlattr *pinfoattr;
9601 void *hdr; 10163 void *hdr;
@@ -9611,7 +10173,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
9611 } 10173 }
9612 10174
9613 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || 10175 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
9614 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || 10176 nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
9615 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer)) 10177 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer))
9616 goto nla_put_failure; 10178 goto nla_put_failure;
9617 10179
@@ -9640,6 +10202,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
9640 genlmsg_cancel(msg, hdr); 10202 genlmsg_cancel(msg, hdr);
9641 nlmsg_free(msg); 10203 nlmsg_free(msg);
9642} 10204}
10205EXPORT_SYMBOL(cfg80211_cqm_txe_notify);
9643 10206
9644void 10207void
9645nl80211_radar_notify(struct cfg80211_registered_device *rdev, 10208nl80211_radar_notify(struct cfg80211_registered_device *rdev,
@@ -9692,15 +10255,18 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
9692 nlmsg_free(msg); 10255 nlmsg_free(msg);
9693} 10256}
9694 10257
9695void 10258void cfg80211_cqm_pktloss_notify(struct net_device *dev,
9696nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, 10259 const u8 *peer, u32 num_packets, gfp_t gfp)
9697 struct net_device *netdev, const u8 *peer,
9698 u32 num_packets, gfp_t gfp)
9699{ 10260{
10261 struct wireless_dev *wdev = dev->ieee80211_ptr;
10262 struct wiphy *wiphy = wdev->wiphy;
10263 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9700 struct sk_buff *msg; 10264 struct sk_buff *msg;
9701 struct nlattr *pinfoattr; 10265 struct nlattr *pinfoattr;
9702 void *hdr; 10266 void *hdr;
9703 10267
10268 trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets);
10269
9704 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 10270 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9705 if (!msg) 10271 if (!msg)
9706 return; 10272 return;
@@ -9712,7 +10278,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
9712 } 10278 }
9713 10279
9714 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || 10280 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
9715 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || 10281 nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
9716 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer)) 10282 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer))
9717 goto nla_put_failure; 10283 goto nla_put_failure;
9718 10284
@@ -9735,6 +10301,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
9735 genlmsg_cancel(msg, hdr); 10301 genlmsg_cancel(msg, hdr);
9736 nlmsg_free(msg); 10302 nlmsg_free(msg);
9737} 10303}
10304EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
9738 10305
9739void cfg80211_probe_status(struct net_device *dev, const u8 *addr, 10306void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
9740 u64 cookie, bool acked, gfp_t gfp) 10307 u64 cookie, bool acked, gfp_t gfp)
@@ -10021,6 +10588,50 @@ static struct notifier_block nl80211_netlink_notifier = {
10021 .notifier_call = nl80211_netlink_notify, 10588 .notifier_call = nl80211_netlink_notify,
10022}; 10589};
10023 10590
10591void cfg80211_ft_event(struct net_device *netdev,
10592 struct cfg80211_ft_event_params *ft_event)
10593{
10594 struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy;
10595 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
10596 struct sk_buff *msg;
10597 void *hdr;
10598 int err;
10599
10600 trace_cfg80211_ft_event(wiphy, netdev, ft_event);
10601
10602 if (!ft_event->target_ap)
10603 return;
10604
10605 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
10606 if (!msg)
10607 return;
10608
10609 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT);
10610 if (!hdr) {
10611 nlmsg_free(msg);
10612 return;
10613 }
10614
10615 nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
10616 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
10617 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap);
10618 if (ft_event->ies)
10619 nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies);
10620 if (ft_event->ric_ies)
10621 nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len,
10622 ft_event->ric_ies);
10623
10624 err = genlmsg_end(msg, hdr);
10625 if (err < 0) {
10626 nlmsg_free(msg);
10627 return;
10628 }
10629
10630 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
10631 nl80211_mlme_mcgrp.id, GFP_KERNEL);
10632}
10633EXPORT_SYMBOL(cfg80211_ft_event);
10634
10024/* initialisation/exit functions */ 10635/* initialisation/exit functions */
10025 10636
10026int nl80211_init(void) 10637int nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b061da4919e1..a4073e808c13 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -29,12 +29,6 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
29void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, 29void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
30 struct net_device *netdev, 30 struct net_device *netdev,
31 const u8 *buf, size_t len, gfp_t gfp); 31 const u8 *buf, size_t len, gfp_t gfp);
32void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev,
33 struct net_device *netdev,
34 const u8 *buf, size_t len, gfp_t gfp);
35void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev,
36 struct net_device *netdev,
37 const u8 *buf, size_t len, gfp_t gfp);
38void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, 32void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
39 struct net_device *netdev, 33 struct net_device *netdev,
40 const u8 *addr, gfp_t gfp); 34 const u8 *addr, gfp_t gfp);
@@ -54,10 +48,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
54 struct net_device *netdev, u16 reason, 48 struct net_device *netdev, u16 reason,
55 const u8 *ie, size_t ie_len, bool from_ap); 49 const u8 *ie, size_t ie_len, bool from_ap);
56 50
57void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
58 struct net_device *netdev,
59 const u8 *macaddr, const u8* ie, u8 ie_len,
60 gfp_t gfp);
61void 51void
62nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, 52nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
63 struct net_device *netdev, const u8 *addr, 53 struct net_device *netdev, const u8 *addr,
@@ -73,41 +63,10 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
73 struct net_device *netdev, const u8 *bssid, 63 struct net_device *netdev, const u8 *bssid,
74 gfp_t gfp); 64 gfp_t gfp);
75 65
76void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev,
77 struct wireless_dev *wdev, u64 cookie,
78 struct ieee80211_channel *chan,
79 unsigned int duration, gfp_t gfp);
80void nl80211_send_remain_on_channel_cancel(
81 struct cfg80211_registered_device *rdev,
82 struct wireless_dev *wdev,
83 u64 cookie, struct ieee80211_channel *chan, gfp_t gfp);
84
85void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
86 struct net_device *dev, const u8 *mac_addr,
87 struct station_info *sinfo, gfp_t gfp);
88void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
89 struct net_device *dev, const u8 *mac_addr,
90 gfp_t gfp);
91
92void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
93 struct net_device *dev, const u8 *mac_addr,
94 enum nl80211_connect_failed_reason reason,
95 gfp_t gfp);
96
97int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, 66int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
98 struct wireless_dev *wdev, u32 nlpid, 67 struct wireless_dev *wdev, u32 nlpid,
99 int freq, int sig_dbm, 68 int freq, int sig_dbm,
100 const u8 *buf, size_t len, gfp_t gfp); 69 const u8 *buf, size_t len, gfp_t gfp);
101void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
102 struct wireless_dev *wdev, u64 cookie,
103 const u8 *buf, size_t len, bool ack,
104 gfp_t gfp);
105
106void
107nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
108 struct net_device *netdev,
109 enum nl80211_cqm_rssi_threshold_event rssi_event,
110 gfp_t gfp);
111 70
112void 71void
113nl80211_radar_notify(struct cfg80211_registered_device *rdev, 72nl80211_radar_notify(struct cfg80211_registered_device *rdev,
@@ -115,31 +74,4 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
115 enum nl80211_radar_event event, 74 enum nl80211_radar_event event,
116 struct net_device *netdev, gfp_t gfp); 75 struct net_device *netdev, gfp_t gfp);
117 76
118void
119nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
120 struct net_device *netdev, const u8 *peer,
121 u32 num_packets, gfp_t gfp);
122
123void
124nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
125 struct net_device *netdev, const u8 *peer,
126 u32 num_packets, u32 rate, u32 intvl, gfp_t gfp);
127
128void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
129 struct net_device *netdev, const u8 *bssid,
130 const u8 *replay_ctr, gfp_t gfp);
131
132void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
133 struct net_device *netdev, int index,
134 const u8 *bssid, bool preauth, gfp_t gfp);
135
136void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
137 struct net_device *dev,
138 struct cfg80211_chan_def *chandef, gfp_t gfp);
139
140bool nl80211_unexpected_frame(struct net_device *dev,
141 const u8 *addr, gfp_t gfp);
142bool nl80211_unexpected_4addr_frame(struct net_device *dev,
143 const u8 *addr, gfp_t gfp);
144
145#endif /* __NET_WIRELESS_NL80211_H */ 77#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 422d38291d66..d77e1c1d3a0e 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -6,11 +6,12 @@
6#include "core.h" 6#include "core.h"
7#include "trace.h" 7#include "trace.h"
8 8
9static inline int rdev_suspend(struct cfg80211_registered_device *rdev) 9static inline int rdev_suspend(struct cfg80211_registered_device *rdev,
10 struct cfg80211_wowlan *wowlan)
10{ 11{
11 int ret; 12 int ret;
12 trace_rdev_suspend(&rdev->wiphy, rdev->wowlan); 13 trace_rdev_suspend(&rdev->wiphy, wowlan);
13 ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan); 14 ret = rdev->ops->suspend(&rdev->wiphy, wowlan);
14 trace_rdev_return_int(&rdev->wiphy, ret); 15 trace_rdev_return_int(&rdev->wiphy, ret);
15 return ret; 16 return ret;
16} 17}
@@ -887,4 +888,17 @@ static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
887 trace_rdev_return_int(&rdev->wiphy, ret); 888 trace_rdev_return_int(&rdev->wiphy, ret);
888 return ret; 889 return ret;
889} 890}
891
892static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev,
893 struct net_device *dev,
894 struct cfg80211_update_ft_ies_params *ftie)
895{
896 int ret;
897
898 trace_rdev_update_ft_ies(&rdev->wiphy, dev, ftie);
899 ret = rdev->ops->update_ft_ies(&rdev->wiphy, dev, ftie);
900 trace_rdev_return_int(&rdev->wiphy, ret);
901 return ret;
902}
903
890#endif /* __CFG80211_RDEV_OPS */ 904#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 98532c00242d..e6df52dc8c69 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -184,14 +184,14 @@ static const struct ieee80211_regdomain world_regdom = {
184 NL80211_RRF_NO_IBSS | 184 NL80211_RRF_NO_IBSS |
185 NL80211_RRF_NO_OFDM), 185 NL80211_RRF_NO_OFDM),
186 /* IEEE 802.11a, channel 36..48 */ 186 /* IEEE 802.11a, channel 36..48 */
187 REG_RULE(5180-10, 5240+10, 40, 6, 20, 187 REG_RULE(5180-10, 5240+10, 80, 6, 20,
188 NL80211_RRF_PASSIVE_SCAN | 188 NL80211_RRF_PASSIVE_SCAN |
189 NL80211_RRF_NO_IBSS), 189 NL80211_RRF_NO_IBSS),
190 190
191 /* NB: 5260 MHz - 5700 MHz requies DFS */ 191 /* NB: 5260 MHz - 5700 MHz requires DFS */
192 192
193 /* IEEE 802.11a, channel 149..165 */ 193 /* IEEE 802.11a, channel 149..165 */
194 REG_RULE(5745-10, 5825+10, 40, 6, 20, 194 REG_RULE(5745-10, 5825+10, 80, 6, 20,
195 NL80211_RRF_PASSIVE_SCAN | 195 NL80211_RRF_PASSIVE_SCAN |
196 NL80211_RRF_NO_IBSS), 196 NL80211_RRF_NO_IBSS),
197 197
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 482c70e70127..a9dc5c736df0 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -160,7 +160,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
160{ 160{
161 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); 161 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
162 struct cfg80211_connect_params *params; 162 struct cfg80211_connect_params *params;
163 const u8 *prev_bssid = NULL; 163 struct cfg80211_assoc_request req = {};
164 int err; 164 int err;
165 165
166 ASSERT_WDEV_LOCK(wdev); 166 ASSERT_WDEV_LOCK(wdev);
@@ -187,16 +187,20 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
187 BUG_ON(!rdev->ops->assoc); 187 BUG_ON(!rdev->ops->assoc);
188 wdev->conn->state = CFG80211_CONN_ASSOCIATING; 188 wdev->conn->state = CFG80211_CONN_ASSOCIATING;
189 if (wdev->conn->prev_bssid_valid) 189 if (wdev->conn->prev_bssid_valid)
190 prev_bssid = wdev->conn->prev_bssid; 190 req.prev_bssid = wdev->conn->prev_bssid;
191 err = __cfg80211_mlme_assoc(rdev, wdev->netdev, 191 req.ie = params->ie;
192 params->channel, params->bssid, 192 req.ie_len = params->ie_len;
193 prev_bssid, 193 req.use_mfp = params->mfp != NL80211_MFP_NO;
194 params->ssid, params->ssid_len, 194 req.crypto = params->crypto;
195 params->ie, params->ie_len, 195 req.flags = params->flags;
196 params->mfp != NL80211_MFP_NO, 196 req.ht_capa = params->ht_capa;
197 &params->crypto, 197 req.ht_capa_mask = params->ht_capa_mask;
198 params->flags, &params->ht_capa, 198 req.vht_capa = params->vht_capa;
199 &params->ht_capa_mask); 199 req.vht_capa_mask = params->vht_capa_mask;
200
201 err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel,
202 params->bssid, params->ssid,
203 params->ssid_len, &req);
200 if (err) 204 if (err)
201 __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, 205 __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
202 NULL, 0, 206 NULL, 0,
@@ -232,7 +236,7 @@ void cfg80211_conn_work(struct work_struct *work)
232 wdev_unlock(wdev); 236 wdev_unlock(wdev);
233 continue; 237 continue;
234 } 238 }
235 if (wdev->sme_state != CFG80211_SME_CONNECTING) { 239 if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) {
236 wdev_unlock(wdev); 240 wdev_unlock(wdev);
237 continue; 241 continue;
238 } 242 }
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 238ee49b3868..8f28b9f798d8 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -83,6 +83,14 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
83 return 0; 83 return 0;
84} 84}
85 85
86static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
87{
88 struct wireless_dev *wdev;
89
90 list_for_each_entry(wdev, &rdev->wdev_list, list)
91 cfg80211_leave(rdev, wdev);
92}
93
86static int wiphy_suspend(struct device *dev, pm_message_t state) 94static int wiphy_suspend(struct device *dev, pm_message_t state)
87{ 95{
88 struct cfg80211_registered_device *rdev = dev_to_rdev(dev); 96 struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
@@ -90,12 +98,19 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)
90 98
91 rdev->suspend_at = get_seconds(); 99 rdev->suspend_at = get_seconds();
92 100
93 if (rdev->ops->suspend) { 101 rtnl_lock();
94 rtnl_lock(); 102 if (rdev->wiphy.registered) {
95 if (rdev->wiphy.registered) 103 if (!rdev->wowlan)
96 ret = rdev_suspend(rdev); 104 cfg80211_leave_all(rdev);
97 rtnl_unlock(); 105 if (rdev->ops->suspend)
106 ret = rdev_suspend(rdev, rdev->wowlan);
107 if (ret == 1) {
108 /* Driver refuse to configure wowlan */
109 cfg80211_leave_all(rdev);
110 ret = rdev_suspend(rdev, NULL);
111 }
98 } 112 }
113 rtnl_unlock();
99 114
100 return ret; 115 return ret;
101} 116}
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 7586de77a2f8..3c2033b8f596 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1786,6 +1786,26 @@ TRACE_EVENT(rdev_set_mac_acl,
1786 WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) 1786 WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy)
1787); 1787);
1788 1788
1789TRACE_EVENT(rdev_update_ft_ies,
1790 TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
1791 struct cfg80211_update_ft_ies_params *ftie),
1792 TP_ARGS(wiphy, netdev, ftie),
1793 TP_STRUCT__entry(
1794 WIPHY_ENTRY
1795 NETDEV_ENTRY
1796 __field(u16, md)
1797 __dynamic_array(u8, ie, ftie->ie_len)
1798 ),
1799 TP_fast_assign(
1800 WIPHY_ASSIGN;
1801 NETDEV_ASSIGN;
1802 __entry->md = ftie->md;
1803 memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len);
1804 ),
1805 TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x",
1806 WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md)
1807);
1808
1789/************************************************************* 1809/*************************************************************
1790 * cfg80211 exported functions traces * 1810 * cfg80211 exported functions traces *
1791 *************************************************************/ 1811 *************************************************************/
@@ -2414,6 +2434,32 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup,
2414 TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) 2434 TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
2415); 2435);
2416 2436
2437TRACE_EVENT(cfg80211_ft_event,
2438 TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
2439 struct cfg80211_ft_event_params *ft_event),
2440 TP_ARGS(wiphy, netdev, ft_event),
2441 TP_STRUCT__entry(
2442 WIPHY_ENTRY
2443 NETDEV_ENTRY
2444 __dynamic_array(u8, ies, ft_event->ies_len)
2445 MAC_ENTRY(target_ap)
2446 __dynamic_array(u8, ric_ies, ft_event->ric_ies_len)
2447 ),
2448 TP_fast_assign(
2449 WIPHY_ASSIGN;
2450 NETDEV_ASSIGN;
2451 if (ft_event->ies)
2452 memcpy(__get_dynamic_array(ies), ft_event->ies,
2453 ft_event->ies_len);
2454 MAC_ASSIGN(target_ap, ft_event->target_ap);
2455 if (ft_event->ric_ies)
2456 memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies,
2457 ft_event->ric_ies_len);
2458 ),
2459 TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: " MAC_PR_FMT,
2460 WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap))
2461);
2462
2417#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ 2463#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
2418 2464
2419#undef TRACE_INCLUDE_PATH 2465#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 37a56ee1e1ed..6cbac99ae03d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -511,7 +511,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
511 encaps_data = bridge_tunnel_header; 511 encaps_data = bridge_tunnel_header;
512 encaps_len = sizeof(bridge_tunnel_header); 512 encaps_len = sizeof(bridge_tunnel_header);
513 skip_header_bytes -= 2; 513 skip_header_bytes -= 2;
514 } else if (ethertype > 0x600) { 514 } else if (ethertype >= ETH_P_802_3_MIN) {
515 encaps_data = rfc1042_header; 515 encaps_data = rfc1042_header;
516 encaps_len = sizeof(rfc1042_header); 516 encaps_len = sizeof(rfc1042_header);
517 skip_header_bytes -= 2; 517 skip_header_bytes -= 2;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 167c67d46c6a..23cea0f74336 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1037,6 +1037,24 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
1037 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); 1037 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1038} 1038}
1039 1039
1040static int flow_to_policy_dir(int dir)
1041{
1042 if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1043 XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1044 XFRM_POLICY_FWD == FLOW_DIR_FWD)
1045 return dir;
1046
1047 switch (dir) {
1048 default:
1049 case FLOW_DIR_IN:
1050 return XFRM_POLICY_IN;
1051 case FLOW_DIR_OUT:
1052 return XFRM_POLICY_OUT;
1053 case FLOW_DIR_FWD:
1054 return XFRM_POLICY_FWD;
1055 }
1056}
1057
1040static struct flow_cache_object * 1058static struct flow_cache_object *
1041xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, 1059xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1042 u8 dir, struct flow_cache_object *old_obj, void *ctx) 1060 u8 dir, struct flow_cache_object *old_obj, void *ctx)
@@ -1046,7 +1064,7 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1046 if (old_obj) 1064 if (old_obj)
1047 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); 1065 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1048 1066
1049 pol = __xfrm_policy_lookup(net, fl, family, dir); 1067 pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1050 if (IS_ERR_OR_NULL(pol)) 1068 if (IS_ERR_OR_NULL(pol))
1051 return ERR_CAST(pol); 1069 return ERR_CAST(pol);
1052 1070
@@ -1932,7 +1950,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
1932 * previous cache entry */ 1950 * previous cache entry */
1933 if (xdst == NULL) { 1951 if (xdst == NULL) {
1934 num_pols = 1; 1952 num_pols = 1;
1935 pols[0] = __xfrm_policy_lookup(net, fl, family, dir); 1953 pols[0] = __xfrm_policy_lookup(net, fl, family,
1954 flow_to_policy_dir(dir));
1936 err = xfrm_expand_policies(fl, family, pols, 1955 err = xfrm_expand_policies(fl, family, pols,
1937 &num_pols, &num_xfrms); 1956 &num_pols, &num_xfrms);
1938 if (err < 0) 1957 if (err < 0)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 2c341bdaf47c..78f66fa92449 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1187,6 +1187,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1187 goto error; 1187 goto error;
1188 1188
1189 x->props.flags = orig->props.flags; 1189 x->props.flags = orig->props.flags;
1190 x->props.extra_flags = orig->props.extra_flags;
1190 1191
1191 x->curlft.add_time = orig->curlft.add_time; 1192 x->curlft.add_time = orig->curlft.add_time;
1192 x->km.state = orig->km.state; 1193 x->km.state = orig->km.state;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index fbd9e6cd0fd7..aa778748c565 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
515 515
516 copy_from_user_state(x, p); 516 copy_from_user_state(x, p);
517 517
518 if (attrs[XFRMA_SA_EXTRA_FLAGS])
519 x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
520
518 if ((err = attach_aead(&x->aead, &x->props.ealgo, 521 if ((err = attach_aead(&x->aead, &x->props.ealgo,
519 attrs[XFRMA_ALG_AEAD]))) 522 attrs[XFRMA_ALG_AEAD])))
520 goto error; 523 goto error;
@@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
779 782
780 copy_to_user_state(x, p); 783 copy_to_user_state(x, p);
781 784
785 if (x->props.extra_flags) {
786 ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS,
787 x->props.extra_flags);
788 if (ret)
789 goto out;
790 }
791
782 if (x->coaddr) { 792 if (x->coaddr) {
783 ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); 793 ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
784 if (ret) 794 if (ret)
@@ -2302,9 +2312,10 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
2302 [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, 2312 [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) },
2303 [XFRMA_TFCPAD] = { .type = NLA_U32 }, 2313 [XFRMA_TFCPAD] = { .type = NLA_U32 },
2304 [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, 2314 [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
2315 [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
2305}; 2316};
2306 2317
2307static struct xfrm_link { 2318static const struct xfrm_link {
2308 int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); 2319 int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
2309 int (*dump)(struct sk_buff *, struct netlink_callback *); 2320 int (*dump)(struct sk_buff *, struct netlink_callback *);
2310 int (*done)(struct netlink_callback *); 2321 int (*done)(struct netlink_callback *);
@@ -2338,7 +2349,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2338{ 2349{
2339 struct net *net = sock_net(skb->sk); 2350 struct net *net = sock_net(skb->sk);
2340 struct nlattr *attrs[XFRMA_MAX+1]; 2351 struct nlattr *attrs[XFRMA_MAX+1];
2341 struct xfrm_link *link; 2352 const struct xfrm_link *link;
2342 int type, err; 2353 int type, err;
2343 2354
2344 type = nlh->nlmsg_type; 2355 type = nlh->nlmsg_type;
@@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
2495 x->security->ctx_len); 2506 x->security->ctx_len);
2496 if (x->coaddr) 2507 if (x->coaddr)
2497 l += nla_total_size(sizeof(*x->coaddr)); 2508 l += nla_total_size(sizeof(*x->coaddr));
2509 if (x->props.extra_flags)
2510 l += nla_total_size(sizeof(x->props.extra_flags));
2498 2511
2499 /* Must count x->lastused as it may become non-zero behind our back. */ 2512 /* Must count x->lastused as it may become non-zero behind our back. */
2500 l += nla_total_size(sizeof(u64)); 2513 l += nla_total_size(sizeof(u64));